-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
1704 lines (1318 loc) · 455 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 5.3.0">
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png">
<link rel="mask-icon" href="/images/logo.svg" color="#222">
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="/lib/font-awesome/css/all.min.css">
<script id="hexo-configurations">
var NexT = window.NexT || {};
var CONFIG = {"hostname":"example.com","root":"/","scheme":"Muse","version":"7.8.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12,"onmobile":false},"copycode":{"enable":false,"show_result":false,"style":null},"back2top":{"enable":true,"sidebar":false,"scrollpercent":false},"bookmark":{"enable":false,"color":"#222","save":"auto"},"fancybox":false,"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"algolia":{"hits":{"per_page":10},"labels":{"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}},"localsearch":{"enable":false,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},"motion":{"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}}};
</script>
<meta property="og:type" content="website">
<meta property="og:title" content="lagrange's blog">
<meta property="og:url" content="http://example.com/index.html">
<meta property="og:site_name" content="lagrange's blog">
<meta property="og:locale" content="zh_CN">
<meta property="article:author" content="lagrange">
<meta name="twitter:card" content="summary">
<link rel="canonical" href="http://example.com/">
<script id="page-configurations">
// https://hexo.io/docs/variables.html
CONFIG.page = {
sidebar: "",
isHome : true,
isPost : false,
lang : 'zh-CN'
};
</script>
<title>lagrange's blog</title>
<noscript>
<style>
.use-motion .brand,
.use-motion .menu-item,
.sidebar-inner,
.use-motion .post-block,
.use-motion .pagination,
.use-motion .comments,
.use-motion .post-header,
.use-motion .post-body,
.use-motion .collection-header { opacity: initial; }
.use-motion .site-title,
.use-motion .site-subtitle {
opacity: initial;
top: initial;
}
.use-motion .logo-line-before i { left: initial; }
.use-motion .logo-line-after i { right: initial; }
</style>
</noscript>
</head>
<body itemscope itemtype="http://schema.org/WebPage">
<div class="container use-motion">
<div class="headband"></div>
<header class="header" itemscope itemtype="http://schema.org/WPHeader">
<div class="header-inner"><div class="site-brand-container">
<div class="site-nav-toggle">
<div class="toggle" aria-label="切换导航栏">
<span class="toggle-line toggle-line-first"></span>
<span class="toggle-line toggle-line-middle"></span>
<span class="toggle-line toggle-line-last"></span>
</div>
</div>
<div class="site-meta">
<a href="/" class="brand" rel="start">
<span class="logo-line-before"><i></i></span>
<h1 class="site-title">lagrange's blog</h1>
<span class="logo-line-after"><i></i></span>
</a>
<p class="site-subtitle" itemprop="description">what's dead may never die</p>
</div>
<div class="site-nav-right">
<div class="toggle popup-trigger">
</div>
</div>
</div>
<nav class="site-nav">
<ul id="menu" class="main-menu menu">
<li class="menu-item menu-item-home">
<a href="/" rel="section"><i class="fa fa-home fa-fw"></i>首页</a>
</li>
<li class="menu-item menu-item-archives">
<a href="/archives/" rel="section"><i class="fa fa-archive fa-fw"></i>归档<span class="badge">14</span></a>
</li>
</ul>
</nav>
</div>
</header>
<div class="back-to-top">
<i class="fa fa-arrow-up"></i>
<span>0%</span>
</div>
<main class="main">
<div class="main-inner">
<div class="content-wrap">
<div class="content index posts-expand">
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="http://example.com/2021/03/24/%E8%BF%9B%E7%A8%8B%E7%AE%A1%E7%90%86/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="lagrange">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="lagrange's blog">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2021/03/24/%E8%BF%9B%E7%A8%8B%E7%AE%A1%E7%90%86/" class="post-title-link" itemprop="url">hardCore 进程管理</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2021-03-24 10:47:34" itemprop="dateCreated datePublished" datetime="2021-03-24T10:47:34Z">2021-03-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2021-04-23 00:31:31" itemprop="dateModified" datetime="2021-04-23T00:31:31Z">2021-04-23</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/OS/" itemprop="url" rel="index"><span itemprop="name">OS</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h2 id="整体设计"><a href="#整体设计" class="headerlink" title="整体设计"></a>整体设计</h2><p>整个hardCore的进程部分由uCore扩展而来,我们主要扩展的目标有以下几个:</p>
<ol>
<li>实现对内核线程的支持,使得用户态可以使用多线程</li>
<li>扩展OS支持的调度算法:<ol>
<li>基于红黑树实现linux2.6版本中经典进程调度器CFS</li>
<li>stride调度器</li>
<li>RR调度器</li>
</ol>
</li>
<li>增加用户态的信号量支持,搭配内核线程在用户态实现生产者消费者,读者写者等经典线程同步算法</li>
<li>提高OS进程的可靠性,在用户态因为爆栈等错误退出的时候不会导致OS崩溃</li>
<li>实现top等用户命令,扩展shell功能支持用户态进程后台执行</li>
</ol>
<p>当前已经实现了内核态进程以及CFS调度器,下文详细的阐述了这两个部分的实现过程,下一阶段主要想实现的内容就是信号量以及用户态程序的支持,从而构建一个完整的OS。</p>
<h2 id="用户态内核进程实现"><a href="#用户态内核进程实现" class="headerlink" title="用户态内核进程实现"></a>用户态内核进程实现</h2><h3 id="linux-实现参考"><a href="#linux-实现参考" class="headerlink" title="linux 实现参考"></a>linux 实现参考</h3><p><a target="_blank" rel="noopener" href="https://stackoverflow.com/questions/28535838/how-stack-or-memory-is-allocated-for-threads-under-the-same-process-in-linux?fileGuid=ZzkLVnaGVeiM443Q">How Stack or memory is allocated for threads under the same process in Linux</a></p>
<p>The current ‘thread’ concept in Linux is the NPTL one. NPTL uses<code>clone()</code>, which wraps<code>sys_clone()</code>. Allocating a stack for a new ‘thread’ is handled in the user space (ie. libc), not in kernel (ie. Linux). A library can allocate a stack using the allocation of choice (eg.<code>malloc</code>) and then call<code>clone()</code>passing this address as the stack (of course, needs to pass the top of the allocated region, since stacks grow downwards on most platforms):</p>
<p>Unlike<code>fork()</code>,<code>clone()</code>allows the child process to share parts of its execution context with the calling process, such as the memory space, the table of file descriptors, and the table of signal handlers….</p>
<p>The main use of<code>clone()</code>is to implement threads: multiple threads of control in a program that run concurrently in a shared memory space.</p>
<p>When the child process is created with<code>clone()</code>, it executes the function<code>fn(arg)</code>…</p>
<p>The child_stack argument specifies the location of the stack used by the child process …</p>
<p>If you want to learn more specific details, open the source of your distro<a target="_blank" rel="noopener" href="https://man7.org/linux/man-pages/man3/pthread_create.3.html?fileGuid=ZzkLVnaGVeiM443Q">pthread_create</a>implementation and get reading.</p>
<p>For example<a target="_blank" rel="noopener" href="https://github.com/lattera/glibc/blob/a2f34833b1042d5d8eeb263b4cf4caaea138c4ad/nptl/pthread_create.c?fileGuid=ZzkLVnaGVeiM443Q">pthread_create.c</a>:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">int</span><br><span class="line">__pthread_create_2_1 (newthread, attr, start_routine, arg)</span><br><span class="line"> ...</span><br><span class="line"> struct pthread *pd = NULL;</span><br><span class="line"> int err = ALLOCATE_STACK (iattr, &pd);</span><br></pre></td></tr></table></figure>
<p>and<a target="_blank" rel="noopener" href="https://github.com/lattera/glibc/blob/a2f34833b1042d5d8eeb263b4cf4caaea138c4ad/nptl/allocatestack.c?fileGuid=ZzkLVnaGVeiM443Q">allocatestack.c</a>:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"># define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)</span><br><span class="line">static int</span><br><span class="line">allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,</span><br><span class="line"> ALLOCATE_STACK_PARMS)</span><br><span class="line">...</span><br></pre></td></tr></table></figure>
<p>You’ll see that stack allocation has some whistles and bells, like caching and reusing stack regions, guard pages, but in the end is just a memory region allocated in user space.</p>
<h3 id="uCore-实现"><a href="#uCore-实现" class="headerlink" title="uCore 实现"></a>uCore 实现</h3><p>在 linux 中,thread 通过 clone 调用实现,但是分配栈空间是用户空间关心的事,比如使用 malloc 申请之类的。</p>
<p>在 uCore 中并没有实现用户态的 malloc,所以暂时无法使用 malloc 在堆区为一个新的进程开辟一个栈空间,但这并非就是无法实现进程。</p>
<p>于是我们换了另外一种方式就是挪用调用 clone 进程的栈空间。linux 默认为每个进程分配 1M 的栈空间,一旦某个进程调用 clone,我们将该调用进程 1M 的栈空间等分为 16 份,取出一块空闲的栈空间分配给该子线程。针对可能出现递归调用 phthread_create 的情况,不断向上找到第一个调用 clone,然后栈被分割成 16 份的进程,从该进程处获取栈空间。</p>
<p><img src="https://uploader.shimo.im/f/hAtGzepqeSu08AhO.png!thumbnail?fileGuid=ZzkLVnaGVeiM443Q" alt="图片"></p>
<p>子线程退出时会向该进程归还所使用的栈空间,所以包含该第一个调用 clone 的进程在内(下文统一称为祖宗线程),同一时间进程里可以存在 16 个线程。</p>
<h4 id="PCB-支持"><a href="#PCB-支持" class="headerlink" title="PCB 支持"></a>PCB 支持</h4><p>在 PCB 中增加以下三个变量用于支持内核线程</p>
<table>
<thead>
<tr>
<th align="left">Type</th>
<th align="left">name</th>
<th align="left">meaning</th>
</tr>
</thead>
<tbody><tr>
<td align="left">int</td>
<td align="left">is_thread</td>
<td align="left">标志该进程是否是一个子线程</td>
</tr>
<tr>
<td align="left">int</td>
<td align="left">stack_num</td>
<td align="left">标志该子线程占用了父进程的哪一个栈帧,is_thread = 1 才有效</td>
</tr>
<tr>
<td align="left">int [ ]</td>
<td align="left">stack[MAX_THREAD]</td>
<td align="left">每个主进程能够开启 16 个线程(包括主线程(自己)在内),每个块为 0 表示该块的栈没有被占用,不为 0 表示被占用,且值是该子线程的 pid</td>
</tr>
</tbody></table>
<p>特别说明只有祖宗线程和普通进程 is_thread 为 0,其他子线程该值都是 1。</p>
<h4 id="clone-调用"><a href="#clone-调用" class="headerlink" title="clone 调用"></a>clone 调用</h4><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br><span class="line">93</span><br><span class="line">94</span><br></pre></td><td class="code"><pre><span class="line">int do_clone(void *(*fn)(void *), void *arg, void (*exit)(int))</span><br><span class="line">{</span><br><span class="line"> int ret = -E_NO_FREE_PROC;</span><br><span class="line"> struct proc_struct *proc;</span><br><span class="line"> if (nr_process >= MAX_PROCESS)</span><br><span class="line"> goto fork_out;</span><br><span class="line"> ret = -E_NO_MEM;</span><br><span class="line"> // 新建一个空进程描述符</span><br><span class="line"> if ((proc = alloc_proc()) == NULL)</span><br><span class="line"> goto fork_out;</span><br><span class="line"> // 设置线程名称为 父线程名称-t</span><br><span class="line"> int i = 0;</span><br><span class="line"> for (i = 0; current->name[i] != '\0'; i++)</span><br><span class="line"> proc->name[i] = current->name[i];</span><br><span class="line"> proc->name[i] = '-';</span><br><span class="line"> proc->name[i + 1] = 't';</span><br><span class="line"> proc->name[i + 2] = '\0';</span><br><span class="line"> proc->is_thread = 1; //标志该进程是一个子线程</span><br><span class="line"> // 针对可能出现递归调用pthread_create的情况,找到不为线程的主进程</span><br><span class="line"> struct proc_struct *father = current;</span><br><span class="line"> while (father->is_thread)</span><br><span class="line"> father = father->parent;</span><br><span class="line"> // 如果不设置线程归属于调用clone的线程,直接指向主线程会导致子线程中没法调用join来等待</span><br><span class="line"> proc->parent = current;</span><br><span class="line"> // 在主线程里面找一块栈分配给该子线程</span><br><span class="line"> proc->stack_num = 1;</span><br><span class="line"> for (; proc->stack_num < MAX_THREAD; proc->stack_num++)</span><br><span class="line"> if (father->stack[proc->stack_num] == 0)</span><br><span class="line"> {</span><br><span class="line"> father->stack[proc->stack_num] = 1;</span><br><span class="line"> break;</span><br><span class="line"> }</span><br><span class="line"> assert(proc->stack_num != MAX_THREAD);</span><br><span class="line"> assert(current->wait_state == 0);</span><br><span class="line"> // 设置内核栈</span><br><span class="line"> if (setup_kstack(proc) != 0)</span><br><span class="line"> goto bad_fork_cleanup_proc;</span><br><span class="line"> // 文件系统直接指向父进程</span><br><span class="line"> if (copy_fs(CLONE_FS, proc) != 0)</span><br><span class="line"> goto bad_fork_cleanup_kstack;</span><br><span class="line"> // mm 直接指向父进程, 一个用户进程有1MB的栈空间</span><br><span class="line"> // 256页,一个线程给16页,包括原有的主线程的话,能开16个线程</span><br><span class="line"> if (copy_mm(CLONE_VM, proc) != 0)</span><br><span class="line"> goto bad_fork_cleanup_fs;</span><br><span class="line"> // !! 注意这个地址不是这个线程的地址,是上一个线程栈的栈底地址</span><br><span class="line"> // 给线程分配栈,一个进程16页</span><br><span class="line"> uint32_t thread_stack_top = USTACKTOP - (proc->stack_num) * 16 * PGSIZE;</span><br><span class="line"> // 预先给两页给新分配的线程</span><br><span class="line"> assert(pgdir_alloc_page(proc->mm->pgdir, thread_stack_top - PGSIZE, PTE_USER) != NULL);</span><br><span class="line"> assert(pgdir_alloc_page(proc->mm->pgdir, thread_stack_top - 2 * PGSIZE, PTE_USER) != NULL);</span><br><span class="line"> proc->tf = (struct trapframe *)(proc->kstack + KSTACKSIZE) - 1;</span><br><span class="line"> struct trapframe *tf = proc->tf;</span><br><span class="line"> memset(tf, 0, sizeof(struct trapframe));</span><br><span class="line"> tf->tf_cs = USER_CS;</span><br><span class="line"> tf->tf_ds = tf->tf_es = tf->tf_ss = USER_DS;</span><br><span class="line"> // 把栈往上抬4个字节,才开始放东西,否则栈会越界</span><br><span class="line"> // 先放exit的参数为0</span><br><span class="line"> *(uint32_t *)(thread_stack_top - 1 * sizeof(uint32_t)) = (uint32_t)0;</span><br><span class="line"> // 压入线程函数的参数地址</span><br><span class="line"> *(uint32_t *)(thread_stack_top - 2 * sizeof(uint32_t)) = (uint32_t)arg;</span><br><span class="line"> // 压入上一个函数的返回地址为 exit ,保证函数结束并没有显式调用 exit 时系统能帮助该线程退出。</span><br><span class="line"> *(uint32_t *)(thread_stack_top - 3 * sizeof(uint32_t)) = exit;</span><br><span class="line"> tf->tf_esp = thread_stack_top - 3 * sizeof(uint32_t);</span><br><span class="line"> // 设置 eip 指向当前函数开始执行</span><br><span class="line"> tf->tf_eip = fn;</span><br><span class="line"> tf->tf_eflags = FL_IF;</span><br><span class="line"> ret = 0;</span><br><span class="line"> tf->tf_regs.reg_eax = 0;</span><br><span class="line"> tf->tf_eflags |= FL_IF;</span><br><span class="line"> // context 在调度的时候会弹出 tf 内的寄存器,恢复程序执行</span><br><span class="line"> proc->context.eip = (uintptr_t)forkret;</span><br><span class="line"> proc->context.esp = (uintptr_t)(proc->tf);</span><br><span class="line"> bool intr_flag;</span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> {</span><br><span class="line"> proc->pid = get_pid();</span><br><span class="line"> hash_proc(proc);</span><br><span class="line"> set_links(proc);</span><br><span class="line"> }</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line"> wakeup_proc(proc);</span><br><span class="line"> // 重新设置父线程的栈被该子线程占用</span><br><span class="line"> father->stack[proc->stack_num] = proc->pid;</span><br><span class="line"> ret = proc->pid;</span><br><span class="line">fork_out:</span><br><span class="line"> return ret;</span><br><span class="line">bad_fork_cleanup_fs: //for LAB8</span><br><span class="line"> put_fs(proc);</span><br><span class="line">bad_fork_cleanup_kstack:</span><br><span class="line"> put_kstack(proc);</span><br><span class="line">bad_fork_cleanup_proc:</span><br><span class="line"> kfree(proc);</span><br><span class="line"> goto fork_out;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>注意因为用户有可能不会在线程执行的函数结束返回时调用 exit,所以 clone 的一个任务就是帮助用户将线程栈栈底的返回地址设置为 sys_exit (不是直接 do_exit 因为该线程执行在用户态,只有通过系统调用才能执行内核态代码)<br>最后的线程栈栈帧如图所示</p>
<p><img src="https://uploader.shimo.im/f/lKtHA4uomBRs6JTi.png!thumbnail?fileGuid=ZzkLVnaGVeiM443Q" alt="图片"></p>
<h4 id="祖宗线程-exit-时守护"><a href="#祖宗线程-exit-时守护" class="headerlink" title="祖宗线程 exit 时守护"></a>祖宗线程 exit 时守护</h4><p>若祖宗线程没有调用 join 来等待子线程结束工作,祖宗线程会直接退出,虽然这样内存空间的引用计数不为 0,不会释放他们共用的内存空间,剩余线程还能继续执行,但是所有栈的分配信息都存在祖宗线程的 PCB 表中,如果祖宗线程一旦释放,子进程再想调用 phthread_create 新建子进程会导致失败。于是必须在祖宗线程调用 sys_exit 时守护所有子线程退出后祖宗线程才能退出。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line">static int</span><br><span class="line">sys_exit(uint32_t arg[]) {</span><br><span class="line"> int error_code = (int)arg[0];</span><br><span class="line"> if (is_ancestral_thread(current)) //祖宗进程只有在所有子线程全部释放后才能退出</span><br><span class="line"> while (current_have_kid()) //只要有儿子就等着</span><br><span class="line"> do_wait(0, NULL);</span><br><span class="line"> return do_exit(error_code);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<h4 id="线程一致性维护"><a href="#线程一致性维护" class="headerlink" title="线程一致性维护"></a>线程一致性维护</h4><p>linux 上的线程就是基于轻量级进程, 由用户态的 pthread 库实现的.使用 pthread 以后, 在用户看来, 每一个 task_struct 就对应一个线程, 而一组线程以及它们所共同引用的一组资源就是一个进程。但是, 一组线程并不仅仅是引用同一组资源就够了, 它们还必须被视为一个整体。当”进程”收到一个致命信号(比如由于段错误收到 SIGSEGV 信号), 对应的这一组 task_struct 将全部退出。这是 POSIX 对线程实现提出的要求,如果某个线程”挂”了, 整个进程还在若无其事地运行着, 可能会出现很多的不一致状态. 进程将不是一个整体, 而线程也不能称为线程。</p>
<p>在 uCore 中我们也实现了该线程的一致性</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br></pre></td><td class="code"><pre><span class="line">case T_PGFLT: //page fault</span><br><span class="line"> if ((ret = pgfault_handler(tf)) != 0) {</span><br><span class="line"> print_trapframe(tf);</span><br><span class="line"> if (current == NULL) {</span><br><span class="line"> panic("handle pgfault failed. ret=%d\n", ret);</span><br><span class="line"> }</span><br><span class="line"> else {</span><br><span class="line"> if (trap_in_kernel(tf)) {</span><br><span class="line"> panic("handle pgfault failed in kernel mode. ret=%d\n", ret);</span><br><span class="line"> }</span><br><span class="line"> cprintf("killed by kernel.\n");</span><br><span class="line"> // panic("handle user mode pgfault failed. ret=%d\n", ret);</span><br><span class="line"> kill_all_thread();</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> break;</span><br></pre></td></tr></table></figure>
<p>比如有一个线程缺页异常的话,对应的这组进程都会被杀死。</p>
<h2 id="CFS进程调度实现"><a href="#CFS进程调度实现" class="headerlink" title="CFS进程调度实现"></a>CFS进程调度实现</h2><p>其他两个调度器的实现比较简单,这里主要研究CFS调度器是如何实现的</p>
<h3 id="CFS调度器介绍"><a href="#CFS调度器介绍" class="headerlink" title="CFS调度器介绍"></a>CFS调度器介绍</h3><p>cfs 定义了一种新的模型,它给 cfs_rq(cfs的run queue)中的每一个进程安排一个虚拟时钟,vruntime。如果一个进程得以执行,随着时间的增长(也就是一个个tick的到来),其vruntime将不断增大。没有得到执行的进程vruntime不变。而调度器总是选择vruntime跑得最慢的那个进程来执行。这就是所谓的“完全公平”。为了区别不同优先级的进程,优先级高的进程vruntime增长得慢,以至于它可能得到更多的运行机会。</p>
<p>如果分配给进程的运行时间不等于实际运行的时间时:CFS的思想就是让每个调度实体的vruntime增加速度不同,权重越大的增加的越慢,这样高优先级进程就能获得更多的cpu执行时间,而vruntime值较小者也得到执行。</p>
<p>每一个进程或者调度组都对应一个调度的实体,每一个进程都通过调度实体与CFS运行对列建立联系,每次进行CFS调度的时候都会在CFS运行对列红黑树中选择一个进程(vruntime值较小者)。cfs_rq代表CFS运行对列,它可以找到对应的红黑树。进程task_struct ,可以找到对应的调度实体。调度实体sched_entity对应运行对列红黑树上的一个节点。</p>
<h3 id="调度器实现"><a href="#调度器实现" class="headerlink" title="调度器实现"></a>调度器实现</h3><p>运行队列中的进程/线程会频繁进行插入和删除。为了效率考虑,CFS调度队列我们需要使用红黑树来实现,每次选择最小vruntime的节点执行(即选择这颗红黑树上的最左节点)将其从这颗红黑树中删除,调度到CPU上执行。因为C语言中没有提供面向对象的编程方法,我们借用linux内核中提供的红黑树的基础库来实现我们自己的cfs红黑树。简单介绍一下linux内核里的红黑树。</p>
<p>Linux有很多地方用到了红黑树,比如高精度计时器使用红黑树树组织定时请求,EXT3文件系统也使用红黑树树来管理目录,虚拟存储管理系统也有用红黑树树进行VMAs(Virtual Memory Areas)的管理。Linux内核红黑树的实现与传统的实现方式有些不同,它对针对内核对速度的需要做了优化。每一个rb_node节点是嵌入在用RB树进行组织的数据结构中,而不是用rb_node指针进行数据结构的组织。对于CFS运行队列这颗红黑树的节点 cfs_node,我们只要在里面内嵌一个 rb_node 节点再加一个指向该进程PCB的指针,我们就可以利用linux提供的红黑树操作函数来封装出一个我们自己的CFS调度队列。</p>
<p>对于一个挂在CFS队列里面的进程,我们需要实现的方法有</p>
<figure class="highlight c++"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">#<span class="meta-keyword">ifndef</span> KERN_SCHEDULE_CFS_RBTREE_H</span></span><br><span class="line"><span class="meta">#<span class="meta-keyword">define</span> KERN_SCHEDULE_CFS_RBTREE_H</span></span><br><span class="line"><span class="meta">#<span class="meta-keyword">include</span></span></span><br><span class="line"><span class="class"><span class="keyword">struct</span> <span class="title">cfs_node</span></span></span><br><span class="line"><span class="class">{</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">rb_node</span> <span class="title">node</span>;</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">proc_struct</span> *<span class="title">proc</span>;</span></span><br><span class="line">};</span><br><span class="line"><span class="function">struct cfs_node *<span class="title">cfs_search</span><span class="params">(struct rb_root *root, struct proc_struct *proc)</span></span>;</span><br><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">cfs_insert</span><span class="params">(struct rb_root *root, struct proc_struct *proc)</span></span>;</span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">cfs_node_free</span><span class="params">(struct cfs_node *node)</span></span>;</span><br><span class="line"><span class="function">struct proc_struct * <span class="title">cfs_find_min</span><span class="params">(struct rb_root *root)</span></span>;</span><br><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">compare_cfs_node</span><span class="params">(struct proc_struct *a, struct proc_struct *b)</span></span>;</span><br><span class="line"><span class="meta">#<span class="meta-keyword">endif</span></span></span><br></pre></td></tr></table></figure>
<p>一个节点的插入和删除,找最小(就是找最左子节点),还有节点值的大小比较。这里值得关注的函数就是节点值大小比较。在 linux 中的红黑树中,若两个节点的 value 大小相等会放弃插入,但是连续插入的两个 vruntime 很可能相同,针对这种情况,重新规定红黑树的排序规则</p>
<p>vruntime 不同时比较 vruntime ,vruntime大者大。vruntime 相同时比较 pid ,pid大者大. 因为树里面不可能有两个相同的进程,所以该排序可以保证线序关系。</p>
<p>下面简单的去实现这个红黑树节点的方法</p>
<figure class="highlight c++"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">compare_cfs_node</span><span class="params">(struct proc_struct *a, struct proc_struct *b)</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> <span class="keyword">if</span> (a->vruntime != b->vruntime)</span><br><span class="line"> {</span><br><span class="line"> <span class="keyword">if</span> (a->vruntime > b->vruntime)</span><br><span class="line"> <span class="keyword">return</span> <span class="number">1</span>;</span><br><span class="line"> <span class="keyword">else</span></span><br><span class="line"> <span class="keyword">return</span> <span class="number">-1</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">else</span></span><br><span class="line"> {</span><br><span class="line"> <span class="keyword">if</span> (a->pid > b->pid)</span><br><span class="line"> <span class="keyword">return</span> <span class="number">1</span>;</span><br><span class="line"> <span class="keyword">else</span></span><br><span class="line"> <span class="keyword">return</span> <span class="number">-1</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br><span class="line"><span class="function">struct cfs_node *<span class="title">cfs_search</span><span class="params">(struct rb_root *root, struct proc_struct *proc)</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">rb_node</span> *<span class="title">node</span> =</span> root->rb_node;</span><br><span class="line"> <span class="keyword">while</span> (node)</span><br><span class="line"> {</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">cfs_node</span> *<span class="title">data</span> =</span> container_of(node, struct cfs_node, node);</span><br><span class="line"> <span class="keyword">if</span> (data->proc->pid == proc->pid)</span><br><span class="line"> <span class="keyword">return</span> data;</span><br><span class="line"> <span class="keyword">if</span> (compare_cfs_node(data->proc, proc))</span><br><span class="line"> node = node->rb_left;</span><br><span class="line"> <span class="keyword">else</span></span><br><span class="line"> node = node->rb_right;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">return</span> <span class="literal">NULL</span>;</span><br><span class="line">}</span><br><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">cfs_insert</span><span class="params">(struct rb_root *root, struct proc_struct *proc)</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">cfs_node</span> *<span class="title">data</span> =</span> (struct cfs_node *)kmalloc(<span class="keyword">sizeof</span>(struct cfs_node));</span><br><span class="line"> data->proc = proc;</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">rb_node</span> **<span class="title">new</span> =</span> &(root->rb_node), *parent = <span class="literal">NULL</span>;</span><br><span class="line"> <span class="comment">/* Figure out where to put new node */</span> </span><br><span class="line"> <span class="keyword">while</span> (<span class="keyword">new</span>)</span><br><span class="line"> {</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">cfs_node</span> <span class="title">this</span> =</span> container_of(<span class="keyword">new</span>, struct cfs_node, node);</span><br><span class="line"> <span class="keyword">int</span> result = compare_cfs_node(data->proc, <span class="keyword">this</span>->proc);</span><br><span class="line"> parent = *<span class="keyword">new</span>;</span><br><span class="line"> <span class="keyword">if</span> (result < <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">new</span> = &((*<span class="keyword">new</span>)->rb_left);</span><br><span class="line"> <span class="keyword">else</span> <span class="keyword">if</span> (result > <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">new</span> = &((*<span class="keyword">new</span>)->rb_right);</span><br><span class="line"> <span class="keyword">else</span></span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">/* Add new node and rebalance tree. */</span></span><br><span class="line"> rb_link_node(&data->node, parent, <span class="keyword">new</span>);</span><br><span class="line"> rb_insert_color(&data->node, root);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">1</span>;</span><br><span class="line">}</span><br><span class="line"><span class="function">struct proc_struct *<span class="title">cfs_find_min</span><span class="params">(struct rb_root *root)</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">rb_node</span> *<span class="title">node</span> =</span> root->rb_node;</span><br><span class="line"> <span class="keyword">if</span> (node == <span class="literal">NULL</span>)</span><br><span class="line"> <span class="keyword">return</span> <span class="literal">NULL</span>;</span><br><span class="line"> <span class="keyword">while</span> (node->rb_left)</span><br><span class="line"> node = node->rb_left;</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">cfs_node</span> *<span class="title">data</span> =</span> container_of(node, struct cfs_node, node);</span><br><span class="line"> <span class="keyword">return</span> data->proc;</span><br><span class="line">}</span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">cfs_node_free</span><span class="params">(struct cfs_node *node)</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> <span class="keyword">if</span> (node != <span class="literal">NULL</span>)</span><br><span class="line"> {</span><br><span class="line"> node->proc = <span class="literal">NULL</span>;</span><br><span class="line"> kfree(node);</span><br><span class="line"> node = <span class="literal">NULL</span>;</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>下面开始实现CFS调度器</p>
<p>首先是调度器初始化函数, 初始化红黑树根节点,并设置调度队列里面的进程数量为0</p>
<figure class="highlight c++"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">cfs_init(struct run_queue *rq)</span><br><span class="line">{</span><br><span class="line"> list_init(&(rq->run_list));</span><br><span class="line"> rq->cfs_rb_tree = RB_ROOT;</span><br><span class="line"> rq->proc_num = <span class="number">0</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>之后是插入进程,将进程插入红黑树,并且增加该进程里面的进程数量</p>
<figure class="highlight c++"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">cfs_enqueue(struct run_queue *rq, struct proc_struct *proc)</span><br><span class="line">{</span><br><span class="line"> cfs_insert(&(rq->cfs_rb_tree), proc);</span><br><span class="line"> <span class="keyword">if</span> (proc->time_slice == <span class="number">0</span> || proc->time_slice > rq->max_time_slice)</span><br><span class="line"> proc->time_slice = rq->max_time_slice;</span><br><span class="line"> proc->rq = rq;</span><br><span class="line"> rq->proc_num++;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>然后是把一个待运行的进程从运行队列里面出队,根据PCB找到CFS节点数据结构的位置。销毁创建的CFS节点,减少进程计数。</p>
<figure class="highlight c++"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">cfs_dequeue(struct run_queue *rq, struct proc_struct *proc)</span><br><span class="line">{</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">cfs_node</span> *<span class="title">data</span> =</span> cfs_search(&(rq->cfs_rb_tree), proc);</span><br><span class="line"> <span class="keyword">if</span> (data)</span><br><span class="line"> {</span><br><span class="line"> rb_erase(&data->node, &(rq->cfs_rb_tree));</span><br><span class="line"> cfs_node_free(data);</span><br><span class="line"> rq->proc_num--;</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>然后是每一次时钟中断的调用,这里是我们实现的时候与 linux 实现不一样的地方。在标准linux 的实现中,若当前运行的 proc 的 vruntime 已经不是队列里面最小的了(实际上会导致频繁调度,所以一般设置一个阈值)超过这个限制会直接触发调度,但是我们为了简单和一定程度上的效率考虑只是通过 vruntime 进行选择,而不是使用 vruntime 进行抢占调度。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><span class="line">static void</span><br><span class="line">cfs_proc_tick(struct run_queue *rq, struct proc_struct *proc)</span><br><span class="line">{</span><br><span class="line"> // 实际上CFS的实现:若 proc->vruntime 已经不是队列里面最小的了(实际上会导致频繁调度,所以一般设置一个阈值)</span><br><span class="line"> // 超过这个限制会直接触发调度</span><br><span class="line"> // 此时的进程不在红黑树中,修改 proc->vruntime 不会破坏红黑树的性质。 但是还是需要完善上述 CFS</span><br><span class="line"> // 仍有运行时间则减少其运行时间,计算虚拟运行时间</span><br><span class="line"> assert(proc->cfs_prior != 0);</span><br><span class="line"> proc->vruntime += proc->cfs_prior;</span><br><span class="line"> if (proc->time_slice > 0)</span><br><span class="line"> proc->time_slice--;</span><br><span class="line"> // 时间片用完则标记该进程需要调度</span><br><span class="line"> if (proc->time_slice == 0)</span><br><span class="line"> proc->need_resched = 1;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<h2 id="对一些问题的研究"><a href="#对一些问题的研究" class="headerlink" title="对一些问题的研究"></a>对一些问题的研究</h2><h3 id="用户态进程是如何退出的"><a href="#用户态进程是如何退出的" class="headerlink" title="用户态进程是如何退出的"></a>用户态进程是如何退出的</h3><p>以 stackOverflow.c 这个程序为例, 在编译的时候总共涉及 gcc 的调用和 ld 的链接操作</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">gcc -Iuser/ -fno-builtin -fno-PIC -Wall -ggdb -m32 -gstabs -nostdinc -fno-stack-protector -Ilibs/ -Iuser/include/ -Iuser/libs/ -c user/stackOverFlow.c -o obj/user/stackOverFlow.o</span><br><span class="line">ld -m elf_i386 -nostdlib -T tools/user.ld -o obj/__user_stackOverFlow.out obj/user/libs/panic.o obj/user/libs/syscall.o obj/user/libs/ulib.o obj/user/libs/semaphore.o obj/user/libs/initcode.o obj/user/libs/file.o obj/user/libs/stdio.o obj/user/libs/dir.o obj/user/libs/umain.o obj/user/libs/pthread.o obj/user/libs/proc.o obj/libs/rbtree.o obj/libs/string.o obj/libs/printfmt.o obj/libs/hash.o obj/libs/rand.o obj/user/stackOverFlow.o</span><br></pre></td></tr></table></figure>
<p>之所以选这个程序是因为简单,使用 objdump 反汇编第一条 gcc 编译指令执行完毕生成的 stackOverFlow.o 文件的 text 段如下所示:<br>注意现在还没有到链接的阶段,所有 text 段代码的起始位置都是 0 地址处:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br></pre></td><td class="code"><pre><span class="line">Disassembly of section .text:</span><br><span class="line">00000000 <loop>:</span><br><span class="line"> 0: f3 0f 1e fb endbr32</span><br><span class="line"> 4: 55 push %ebp</span><br><span class="line"> 5: 89 e5 mov %esp,%ebp</span><br><span class="line"> 7: 83 ec 08 sub $0x8,%esp</span><br><span class="line"> a: e8 fc ff ff ff call b <loop+0xb></span><br><span class="line"> f: 90 nop</span><br><span class="line"> 10: c9 leave</span><br><span class="line"> 11: c3 ret</span><br><span class="line">00000012 <main>:</span><br><span class="line"> 12: f3 0f 1e fb endbr32</span><br><span class="line"> 16: 55 push %ebp</span><br><span class="line"> 17: 89 e5 mov %esp,%ebp</span><br><span class="line"> 19: 83 e4 f0 and $0xfffffff0,%esp</span><br><span class="line"> 1c: e8 fc ff ff ff call 1d <main+0xb></span><br><span class="line"> 21: b8 00 00 00 00 mov $0x0,%eax</span><br><span class="line"> 26: c9 leave</span><br><span class="line"> 27: c3 ret</span><br></pre></td></tr></table></figure>
<p>下一条链接指令会将 user/lib 下的所有 .o 文件与 stackOverFlow.o 一起进行链接操作<br>其中链接的 obj/user/libs/initcode.o 是一段汇编代码</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><span class="line">.text</span><br><span class="line">.globl _start</span><br><span class="line">_start:</span><br><span class="line"> # set ebp for backtrace</span><br><span class="line"> movl $0x0, %ebp</span><br><span class="line"> # load argc and argv</span><br><span class="line"> movl (%esp), %ebx</span><br><span class="line"> lea 0x4(%esp), %ecx</span><br><span class="line"></span><br><span class="line"> # move down the esp register</span><br><span class="line"> # since it may cause page fault in backtrace</span><br><span class="line"> subl $0x20, %esp</span><br><span class="line"> # save argc and argv on stack</span><br><span class="line"> pushl %ecx</span><br><span class="line"> pushl %ebx</span><br><span class="line"> # call user-program function</span><br><span class="line"> call umain</span><br><span class="line">1: jmp 1b</span><br></pre></td></tr></table></figure>
<p>结合链接脚本中的<code>ENTRY(_start)</code>可知在链接完成后 initcode.o 是最先被执行的,该段汇编指令调用了 umain 函数,定义在 umain.o 里。<br>umain 打开了输入输出流两个文件描述符,并调用函数 main (注意:umain 上面的 main 函数只是声明,在链接的时候会把 stackOverFlow.o 里面的 main 函数实现给链接上,<strong>main 函数执行完成后 umain 调用系统调用 exit 退出</strong>)</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br></pre></td><td class="code"><pre><span class="line">int main(int argc, char *argv[]);</span><br><span class="line">static int</span><br><span class="line">initfd(int fd2, const char *path, uint32_t open_flags) {</span><br><span class="line"> int fd1, ret;</span><br><span class="line"> if ((fd1 = open(path, open_flags)) < 0) {</span><br><span class="line"> return fd1;</span><br><span class="line"> }</span><br><span class="line"> if (fd1 != fd2) {</span><br><span class="line"> close(fd2);</span><br><span class="line"> ret = dup2(fd1, fd2);</span><br><span class="line"> close(fd1);</span><br><span class="line"> }</span><br><span class="line"> return ret;</span><br><span class="line">}</span><br><span class="line">void</span><br><span class="line">umain(int argc, char *argv[]) {</span><br><span class="line"> int fd;</span><br><span class="line"> if ((fd = initfd(0, "stdin:", O_RDONLY)) < 0) {</span><br><span class="line"> warn("open <stdin> failed: %e.\n", fd);</span><br><span class="line"> }</span><br><span class="line"> if ((fd = initfd(1, "stdout:", O_WRONLY)) < 0) {</span><br><span class="line"> warn("open <stdout> failed: %e.\n", fd);</span><br><span class="line"> }</span><br><span class="line"> int ret = main(argc, argv);</span><br><span class="line"> exit(ret);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>截取几段链接完成后的反汇编代码</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line">Disassembly of section .text:</span><br><span class="line">00800020 <__panic>:</span><br><span class="line">#include <stdio.h></span><br><span class="line">#include <ulib.h></span><br><span class="line">#include <error.h></span><br><span class="line">void</span><br><span class="line">__panic(const char *file, int line, const char *fmt, ...) {</span><br><span class="line"> 800020: f3 0f 1e fb endbr32</span><br><span class="line"> 800024: 55 push %ebp</span><br><span class="line"> 800025: 89 e5 mov %esp,%ebp</span><br><span class="line"> 800027: 83 ec 18 sub $0x18,%esp</span><br></pre></td></tr></table></figure>
<p>可以看到代码段真如 ld 脚本规定的从 0x00800020 地址处开始</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><span class="line">0080066b <_start>:</span><br><span class="line">.text</span><br><span class="line">.globl _start</span><br><span class="line">_start:</span><br><span class="line"> # set ebp for backtrace</span><br><span class="line"> movl $0x0, %ebp</span><br><span class="line"> 80066b: bd 00 00 00 00 mov $0x0,%ebp</span><br><span class="line"> # load argc and argv</span><br><span class="line"> movl (%esp), %ebx</span><br><span class="line"> 800670: 8b 1c 24 mov (%esp),%ebx</span><br><span class="line"> lea 0x4(%esp), %ecx</span><br><span class="line"> 800673: 8d 4c 24 04 lea 0x4(%esp),%ecx</span><br></pre></td></tr></table></figure>
<p>最后操作系统在执行这段代码的时候会根据 elf 的格式将 eip 设置为 0x0080066b 从这开始执行。</p>
<h3 id="为何代码不从-0-地址开始"><a href="#为何代码不从-0-地址开始" class="headerlink" title="为何代码不从 0 地址开始"></a>为何代码不从 0 地址开始</h3><p>The stack, which is usually quite small but could grow quite dramatically in some occasions. The stack grows down, and when the stack is full, we really want the process to predictably crash rather than overwriting some data. So there had to be a wide area for the stack, with, at the low end of that area, an unmapped page. And lo! There is an unmapped page at address zero, to catch null pointer dereferences. Hence it was defined that the stack would get the first 128 MB of address space, except for the first page. This means that the code had to go after those 128 MB, at an address similar to 0x080xxxxx.</p>
<p>As Michael points out, “losing” 128 MB of address space was no big deal because the address space was very large with regards to what could be actually used. At that time, the Linux kernel was limiting the address space for a single process to 1 GB, over a maximum of 4 GB allowed by the hardware, and that was not considered to be a big issue.</p>
<h3 id="第一个用户进程-sh"><a href="#第一个用户进程-sh" class="headerlink" title="第一个用户进程 sh"></a>第一个用户进程 sh</h3><p>第一个用户进程是从<code>int pid = kernel_thread(user_main, NULL, 0);</code>而来。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line">int</span><br><span class="line">kernel_thread(int (*fn)(void *), void *arg, uint32_t clone_flags) {</span><br><span class="line"> struct trapframe tf;</span><br><span class="line"> memset(&tf, 0, sizeof(struct trapframe));</span><br><span class="line"> tf.tf_cs = KERNEL_CS;</span><br><span class="line"> tf.tf_ds = tf.tf_es = tf.tf_ss = KERNEL_DS;</span><br><span class="line"> tf.tf_regs.reg_ebx = (uint32_t)fn;</span><br><span class="line"> tf.tf_regs.reg_edx = (uint32_t)arg;</span><br><span class="line"> tf.tf_eip = (uint32_t)kernel_thread_entry;</span><br><span class="line"> return do_fork(clone_flags | CLONE_VM, 0, &tf);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>送了这个即将执行的 user_main 函数一个内核栈帧和一个 kernel_thread_entry 函数(该函数定义 user_main 执行完成后会自动调用 do_wait) 然后就把 user_main 进程给 fork 出来了(此时还在内核态)。<br>最后 user_main 进程执行了 user_main 函数,引发一个中断调用 exec,把这个进程掏空,该进程变成了进程 sh ,注意在从文件系统载入代码的 load_icode 函数中,在中断栈帧中把 user_main 的段子换成了用户态的段子,只要中断一返回,该进程回到了用户态</p>
<p>虽然上面定义了 user_main 执行完成后会自动调用 do_wait ,但一个 exec 调用过后, eip 指针的值已经被修改,sh 已经和 kernel_thread_entry 没关系了。 所以最后 sh 若要退出需要通过中断调用(用户进程也没法直接执行 do_wait), 具体如何进行的中断调用仍需要研究。</p>
<h3 id="对-top-指令的结果分析"><a href="#对-top-指令的结果分析" class="headerlink" title="对 top 指令的结果分析"></a>对 top 指令的结果分析</h3><p><img src="https://uploader.shimo.im/f/DlKvoeiCUVwBtK9l.png!thumbnail?fileGuid=ZzkLVnaGVeiM443Q" alt="图片"></p>
<p>进程数量等于 os 内的全局变量 nr_process 这个变量包含了 idle 进程所以总共 4 个没问题。</p>
<p>init 进程从其创建,第一次执行 do_wait 的时候,就因为其有 user_main(该进程创造 sh 后结束) 这个进程而陷入了 sleep ,只要其子进程(sh)不死(就是变 Zombie,sleep 都没用),永远不会唤醒该 init 进程来回收资源(其他父线程托孤的时候可能唤醒 init)。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><span class="line">while (do_wait(0, NULL) == 0) {</span><br><span class="line"> schedule();</span><br><span class="line">}</span><br><span class="line">if (haskid) {</span><br><span class="line"> current->state = PROC_SLEEPING;</span><br><span class="line"> current->wait_state = WT_CHILD;</span><br><span class="line"> schedule();</span><br><span class="line"> if (current->flags & PF_EXITING) {</span><br><span class="line"> do_exit(-E_KILLED);</span><br><span class="line"> }</span><br><span class="line"> goto repeat;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>idle 的调度是被特判过的, 运行队列里面没有 idleproc , OS 在没有进程可供调度的时候选择进程 idleproc,自然其 vruntime 不会被计算</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line">static void</span><br><span class="line">sched_class_proc_tick(struct proc_struct *proc) {</span><br><span class="line"> if (proc != idleproc) {</span><br><span class="line"> sched_class->proc_tick(rq, proc);</span><br><span class="line"> }</span><br><span class="line"> else {</span><br><span class="line"> proc->need_resched = 1;</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>sh 是在等待输入的函数<code>dev_stdin_read</code>主动调用<code>schedule()</code>放弃时间片,所以 在间隔两次输入<code>top</code>指令的中间,sh 一直在等待输入,并没有被调度,所以两次间隔后 idle 的调度次数会巨多因为除了 idle ,ucore 没有其他进程可供调度,但 vruntime 是 0(因为特判),sh 的 vruntime 可能会有小幅 提升因为调用了 top ,中间处理会给 sh 时间,但一旦 top 开始执行, sh 又会因为等 top 执行完毕被挂起,top 执行完成后又在等输入。。。。</p>
<h3 id="对用户栈的研究"><a href="#对用户栈的研究" class="headerlink" title="对用户栈的研究"></a>对用户栈的研究</h3><p>每个用户进程都设置了用户栈为,栈顶是 USTACKTOP - 1 (第一个地址不是),栈大小是 USTACKSIZE 。调用 mm_mmap 函数建立用户栈的 vma 结构,明确用户栈的位置在用户虚空间的顶端,大小为 256 个页,即 1MB。但这个只是设置了 vma 。下面调用 pgdir_alloc_page 分配了 4 页(16KB)的栈空间给用户。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line">vm_flags = VM_READ | VM_WRITE | VM_STACK;</span><br><span class="line">if ((ret = mm_map(mm, USTACKTOP - USTACKSIZE, USTACKSIZE, vm_flags, NULL)) != 0) {</span><br><span class="line"> goto bad_cleanup_mmap;</span><br><span class="line">}</span><br><span class="line">assert(pgdir_alloc_page(mm->pgdir, USTACKTOP-PGSIZE , PTE_USER) != NULL);</span><br><span class="line">assert(pgdir_alloc_page(mm->pgdir, USTACKTOP-2*PGSIZE , PTE_USER) != NULL);</span><br><span class="line">assert(pgdir_alloc_page(mm->pgdir, USTACKTOP-3*PGSIZE , PTE_USER) != NULL);</span><br><span class="line">assert(pgdir_alloc_page(mm->pgdir, USTACKTOP-4*PGSIZE , PTE_USER) != NULL);</span><br></pre></td></tr></table></figure>
<p>一旦这 4KB 的栈被用完了,那么就会引起缺页异常。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">struct vma_struct *vma = find_vma(mm, addr);</span><br><span class="line">if (vma == NULL || vma->vm_start > addr) {</span><br><span class="line"> cprintf("not valid addr %x, and can not find it in vma\n", addr);</span><br><span class="line"> goto failed;</span><br><span class="line"> }</span><br><span class="line">ptep = get_pte(mm->pgdir, addr, 1))</span><br><span class="line">pgdir_alloc_page(mm->pgdir, addr, perm)</span><br></pre></td></tr></table></figure>
<p>因为 vma 里写的是 1MB 空间,实际只分配给了 16KB ,缺页的时候只要地址在 1MB 的范围内,那么 通过缺页的地址是找得到对应的 vma 的,说明这个是个合法地址,os 会自动根据缺页地址新建页表项并分配 page 。当栈的使用超过 1MB,就会找不到 vma 直接报错,这个行为目前会直接崩掉内核,可用考虑后续的优化。</p>
<h3 id="时钟中断"><a href="#时钟中断" class="headerlink" title="时钟中断"></a>时钟中断</h3><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">//设置时钟每秒中断100次</span><br><span class="line"> outb(IO_TIMER1, TIMER_DIV(100) % 256);</span><br><span class="line"> outb(IO_TIMER1, TIMER_DIV(100) / 256);</span><br><span class="line">// 通过中断控制器使能时钟中断</span><br><span class="line"> pic_enable(IRQ_TIMER);</span><br></pre></td></tr></table></figure>
<p>每 1s 中断 100 次则一个时间片的时长为 10ms , usleep(100) 睡眠 1s。</p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="http://example.com/2021/01/24/Lab3/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="lagrange">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="lagrange's blog">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2021/01/24/Lab3/" class="post-title-link" itemprop="url">Lab3 实验报告</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2021-01-24 10:47:34" itemprop="dateCreated datePublished" datetime="2021-01-24T10:47:34Z">2021-01-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2021-04-23 00:27:29" itemprop="dateModified" datetime="2021-04-23T00:27:29Z">2021-04-23</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/OS/" itemprop="url" rel="index"><span itemprop="name">OS</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1 id="Lab3"><a href="#Lab3" class="headerlink" title="Lab3"></a>Lab3</h1><h2 id="do-pgfault-之前"><a href="#do-pgfault-之前" class="headerlink" title="do_pgfault 之前"></a>do_pgfault 之前</h2><p>当程序访问内存遇上特殊情况时,CPU 会执行第十四号中断处理程序——缺页处理程序来处理。</p>
<p>特殊情况如下</p>
<ol>
<li>写入一个存在物理页的虚拟页——写时复制。</li>
<li>读写一个不存在物理页的虚拟页——缺页。</li>
<li>不满足访问权限。</li>
</ol>
<p>当程序触发缺页中断时,CPU 会把产生异常的线性地址存储在 CR2 寄存器中,并且把页访问异常错误码保存在中断栈中。</p>
<p>其中,页访问异常错误码的位 0 为 1 表示对应物理页不存在;位 1 为 1 表示写异常;位 2 为 1 表示访问权限异常。</p>
<h3 id="中断处理机制"><a href="#中断处理机制" class="headerlink" title="中断处理机制"></a>中断处理机制</h3><p>一直到<code>do_pgfault</code>的函数调用链为</p>
<p><strong>trap–> trap_dispatch–>pgfault_handler–>do_pgfault</strong></p>
<p>首先是在 trap.c 中中断向量表初始化的时候,将 vectors.S 中的所有跳转到__alltraps 的函数作为中断处理程序填写到 idt 表中,并设置中断寄存器 IDT。</p>
<p>完成该操作后,所有的中断会带着中断的描述向量值跳转到 __alltraps 中,这段汇编会进行中断现场的保存和恢复,并建立一个中断栈帧,最后带着栈帧跳转到 trap.c 中的 trap 函数,trap 函数直接调用 trap_dispatch(并传递该栈帧),trap_dispatch 函数包含了一个 case 语句,根据中断号调用 os 中不同的函数进行处理。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br></pre></td><td class="code"><pre><span class="line">vector254:</span><br><span class="line"> pushl $<span class="number">0</span></span><br><span class="line"> pushl $<span class="number">254</span></span><br><span class="line"> jmp __alltraps</span><br><span class="line"></span><br><span class="line"><span class="keyword">static</span> <span class="class"><span class="keyword">struct</span> <span class="title">gatedesc</span> <span class="title">idt</span>[256] =</span> {{<span class="number">0</span>}};</span><br><span class="line"></span><br><span class="line"><span class="keyword">static</span> <span class="class"><span class="keyword">struct</span> <span class="title">pseudodesc</span> <span class="title">idt_pd</span> =</span> {</span><br><span class="line"> <span class="keyword">sizeof</span>(idt) - <span class="number">1</span>, (<span class="keyword">uintptr_t</span>)idt</span><br><span class="line">};</span><br><span class="line"></span><br><span class="line"><span class="keyword">void</span></span><br><span class="line">idt_init(<span class="keyword">void</span>) {</span><br><span class="line"> <span class="keyword">extern</span> <span class="keyword">uintptr_t</span> __vectors[];</span><br><span class="line"> <span class="keyword">int</span> i;</span><br><span class="line"> <span class="keyword">for</span> (i = <span class="number">0</span>; i < <span class="keyword">sizeof</span>(idt) / <span class="keyword">sizeof</span>(struct gatedesc); i ++) {</span><br><span class="line"> SETGATE(idt[i], <span class="number">0</span>, GD_KTEXT, __vectors[i], DPL_KERNEL);</span><br><span class="line"> }</span><br><span class="line"> lidt(&idt_pd);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<h2 id="do-pgfault"><a href="#do-pgfault" class="headerlink" title="do_pgfault"></a>do_pgfault</h2><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">do_pgfault</span><span class="params">(struct mm_struct *mm, <span class="keyword">uint32_t</span> error_code, <span class="keyword">uintptr_t</span> addr)</span> </span>{</span><br><span class="line"> <span class="keyword">int</span> ret = -E_INVAL;</span><br><span class="line"> <span class="comment">// 获取触发pgfault的虚拟地址所在虚拟页</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">vma_struct</span> *<span class="title">vma</span> =</span> find_vma(mm, addr);</span><br><span class="line"></span><br><span class="line"> pgfault_num++;</span><br><span class="line"> <span class="comment">// 如果当前访问的虚拟地址不在已经分配的虚拟页中</span></span><br><span class="line"> <span class="keyword">if</span> (vma == <span class="literal">NULL</span> || vma->vm_start > addr) {</span><br><span class="line"> cprintf(<span class="string">"not valid addr %x, and can not find it in vma\n"</span>, addr);</span><br><span class="line"> <span class="keyword">goto</span> failed;</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 检测错误代码。这里的检测不涉及特权判断。</span></span><br><span class="line"> <span class="keyword">switch</span> (error_code & <span class="number">3</span>) {</span><br><span class="line"> <span class="keyword">default</span>:</span><br><span class="line"> <span class="comment">// 写,同时存在物理页,则写时复制</span></span><br><span class="line"> <span class="comment">// 需要注意的是,default会执行case2的代码,也就是判断是否有写权限。</span></span><br><span class="line"> <span class="keyword">case</span> <span class="number">2</span>:</span><br><span class="line"> <span class="comment">// 读,同时不存在物理页</span></span><br><span class="line"> <span class="comment">// 同时如果当前操作是写入,但所在虚拟页不允许写入</span></span><br><span class="line"> <span class="keyword">if</span> (!(vma->vm_flags & VM_WRITE)) {</span><br><span class="line"> cprintf(<span class="string">"do_pgfault failed: error code flag = write AND not present, but the addr's vma cannot write\n"</span>);</span><br><span class="line"> <span class="keyword">goto</span> failed;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> <span class="keyword">case</span> <span class="number">1</span>: <span class="comment">/* error code flag : (W/R=0, P=1): read, present */</span></span><br><span class="line"> <span class="comment">// 读,同时存在物理页。那就不可能会调用page fault,肯定哪里有问题,直接failed</span></span><br><span class="line"> cprintf(<span class="string">"do_pgfault failed: error code flag = read AND present\n"</span>);</span><br><span class="line"> <span class="keyword">goto</span> failed;</span><br><span class="line"> <span class="keyword">case</span> <span class="number">0</span>: <span class="comment">/* error code flag : (W/R=0, P=0): read, not present */</span></span><br><span class="line"> <span class="comment">// 写,同时不存在物理页面</span></span><br><span class="line"> <span class="comment">// 如果当前操作是读取,但所在虚拟页不允许读取或执行</span></span><br><span class="line"> <span class="keyword">if</span> (!(vma->vm_flags & (VM_READ | VM_EXEC))) {</span><br><span class="line"> cprintf(<span class="string">"do_pgfault failed: error code flag = read AND not present, but the addr's vma cannot read or exec\n"</span>);</span><br><span class="line"> <span class="keyword">goto</span> failed;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 设置页表条目所对应的权限</span></span><br><span class="line"> <span class="keyword">uint32_t</span> perm = PTE_U;</span><br><span class="line"> <span class="keyword">if</span> (vma->vm_flags & VM_WRITE) {</span><br><span class="line"> perm |= PTE_W;</span><br><span class="line"> }</span><br><span class="line"> addr = ROUNDDOWN(addr, PGSIZE);</span><br><span class="line"> ret = -E_NO_MEM;</span><br><span class="line"> <span class="keyword">pte_t</span> *ptep=<span class="literal">NULL</span>;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/* LAB3 EXERCISE 1: YOUR CODE */</span></span><br><span class="line"> <span class="comment">// 查找当前虚拟地址所对应的页表项</span></span><br><span class="line"> <span class="comment">// create 是 1,查找对应的页表项,页目录下找不到该页表则新建一个页表</span></span><br><span class="line"> <span class="comment">// 返回该页表的地址,地址为空说明找不到页表地址,直接返回failed</span></span><br><span class="line"> <span class="keyword">if</span> ((ptep = get_pte(mm->pgdir, addr, <span class="number">1</span>)) == <span class="literal">NULL</span>) {</span><br><span class="line"> cprintf(<span class="string">"get_pte in do_pgfault failed\n"</span>);</span><br><span class="line"> <span class="keyword">goto</span> failed;</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 如果这个页表项(用了*说明取了页表项中的内容)为0(啥没有)说明所对应的物理页不存在,则新建一个页对应页表</span></span><br><span class="line"> <span class="keyword">if</span> (*ptep == <span class="number">0</span>) {</span><br><span class="line"> <span class="comment">// 分配一块物理页,并设置页表项</span></span><br><span class="line"> <span class="keyword">if</span> (pgdir_alloc_page(mm->pgdir, addr, perm) == <span class="literal">NULL</span>) {</span><br><span class="line"> cprintf(<span class="string">"pgdir_alloc_page in do_pgfault failed\n"</span>);</span><br><span class="line"> <span class="keyword">goto</span> failed;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">else</span> {</span><br><span class="line"> <span class="comment">/* LAB3 EXERCISE 2: YOUR CODE */</span></span><br><span class="line"> <span class="comment">// 如果这个页表项所对应的物理页存在,但不在内存中</span></span><br><span class="line"> <span class="comment">// 如果swap已经初始化完成</span></span><br><span class="line"> <span class="keyword">if</span>(swap_init_ok) {</span><br><span class="line"> struct Page *page=<span class="literal">NULL</span>;</span><br><span class="line"> <span class="comment">// 将目标数据加载到某块新的物理页中。</span></span><br><span class="line"> <span class="comment">// 该物理页可能是尚未分配的物理页,也可能是从别的已分配物理页中取的</span></span><br><span class="line"> <span class="keyword">if</span> ((ret = swap_in(mm, addr, &page)) != <span class="number">0</span>) {</span><br><span class="line"> cprintf(<span class="string">"swap_in in do_pgfault failed\n"</span>);</span><br><span class="line"> <span class="keyword">goto</span> failed;</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 将该物理页与对应的虚拟地址关联,同时设置页表。</span></span><br><span class="line"> page_insert(mm->pgdir, page, addr, perm);</span><br><span class="line"> <span class="comment">// 当前缺失的页已经加载回内存中,所以设置当前页为可swap。</span></span><br><span class="line"> swap_map_swappable(mm, addr, page, <span class="number">1</span>);</span><br><span class="line"> page->pra_vaddr = addr;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">else</span> {</span><br><span class="line"> cprintf(<span class="string">"no swap_init_ok but ptep is %x, failed\n"</span>,*ptep);</span><br><span class="line"> <span class="keyword">goto</span> failed;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> ret = <span class="number">0</span>;</span><br><span class="line">failed:</span><br><span class="line"> <span class="keyword">return</span> ret;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>所以只要在 vma 里面设置的地址都是被 os 认定为有效的,找不到该地址会新建一个页,该页判断被换出会进行换入操作。</p>
<p>swap_in 函数首先向 OS 申请一个空闲页面,然后调用 swapfs_read 尝试将其从 swap 分区中读出。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">int</span></span><br><span class="line">swap_in(struct mm_struct *mm, <span class="keyword">uintptr_t</span> addr, struct Page **ptr_result)</span><br><span class="line">{</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">Page</span> *<span class="title">result</span> =</span> alloc_page();</span><br><span class="line"> <span class="keyword">pte_t</span> *ptep = get_pte(mm->pgdir, addr, <span class="number">0</span>);</span><br><span class="line"> <span class="keyword">int</span> r;</span><br><span class="line"> <span class="comment">//*ptep = pa | PTE_P | perm;</span></span><br><span class="line"> <span class="comment">//这时候PTE_P无效,pa与磁盘扇区有映射关系</span></span><br><span class="line"> <span class="keyword">if</span> ((r = swapfs_read((*ptep), result)) != <span class="number">0</span>)</span><br><span class="line"> assert(r!=<span class="number">0</span>);</span><br><span class="line"> *ptr_result=result;</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<h2 id="SWAP"><a href="#SWAP" class="headerlink" title="SWAP"></a>SWAP</h2><p>虚存中的页与硬盘上的扇区之间的映射关系</p>
<p>如果一个页被置换到了硬盘上,那操作系统如何能简捷来表示这种情况呢?在 ucore 的设计上,充分利用了页表中的 PTE 来表示这种情况:当一个 PTE 用来描述一般意义上的物理页时,显然它应该维护各种权限和映射关系,以及应该有 PTE_P 标记;但当它用来描述一个被置换出去的物理页时,它被用来维护该物理页与 swap 磁盘上扇区的映射关系,并且该 PTE 不应该由 MMU 将它解释成物理页映射(即没有 PTE_P 标记),与此同时对应的权限则交由 mm_struct 来维护,当对位于该页的内存地址进行访问的时候,必然导致 page fault,然后 ucore 能够根据 PTE 描述的 swap 项将相应的物理页重新建立起来,并根据虚存所描述的权限重新设置好 PTE 使得内存访问能够继续正常进行。</p>
<p>如果一个页(4KB/页)被置换到了硬盘某 8 个扇区(0.5KB/扇区),该 PTE 的最低位–present 位应该为 0 (即 PTE_P 标记为空,表示虚实地址映射关系不存在),接下来的 7 位暂时保留,可以用作各种扩展;而包括原来高 20 位页帧号的高 24 位数据,恰好可以用来表示此页在硬盘上的起始扇区的位置(其从第几个扇区开始)。为了在页表项中区别 0 和 swap 分区的映射,将 swap 分区的一个 page 空出来不用,也就是说一个高 24 位不为 0,而最低位为 0 的 PTE 表示了一个放在硬盘上的页的起始扇区号(见 swap.h 中对 swap_entry_t 的描述):</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">swap_entry_t</span><br><span class="line">-------------------------</span><br><span class="line">| offset | reserved | 0 |</span><br><span class="line">-------------------------</span><br><span class="line">24 bits 7 bits 1 bit</span><br></pre></td></tr></table></figure>
<p>考虑到硬盘的最小访问单位是一个扇区,而一个扇区的大小为 512($2^8$)字节,所以需要 8 个连续扇区才能放置一个 4KB 的页。在 ucore 中,用了第二个 IDE 硬盘来保存被换出的扇区,根据实验三的输出信息</p>
<blockquote>
<p>实验三还创建了一个 swap.img</p>
</blockquote>
<figure class="highlight makefile"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">SWAPIMG := <span class="variable">$(<span class="built_in">call</span> totarget,swap.img)</span></span><br><span class="line"></span><br><span class="line"><span class="variable">$(SWAPIMG)</span>:</span><br><span class="line"> <span class="variable">$(V)</span>dd if=/dev/zero of=<span class="variable">$@</span> bs=1024k count=128</span><br><span class="line"></span><br><span class="line"><span class="variable">$(<span class="built_in">call</span> create_target,swap.img)</span></span><br></pre></td></tr></table></figure>
<p>“ide 1: 262144(sectors), ‘QEMU HARDDISK’.”</p>
<p>我们可以知道实验三可以保存 262144/8=32768 个页,即 128MB 的内存空间。swap 分区的大小是 swapfs_init 里面根据磁盘驱动的接口计算出来的,目前 ucore 里面要求 swap 磁盘至少包含 1000 个 page,并且至多能使用 1<<24 个 page。</p>
<h2 id="FIFO"><a href="#FIFO" class="headerlink" title="FIFO"></a>FIFO</h2><p>Page 的数据结构里面又多了几个链接域,用于下面换入换出的时候来管理这些个 Page。因为 FIFO 需要维护现在正在使用的页,所以用来管理的结构是 Page 。因为这些页都是真的,没有被换出,被换出就得删页改页表。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">// the control struct for a set of vma using the same PDT</span></span><br><span class="line"><span class="class"><span class="keyword">struct</span> <span class="title">mm_struct</span> {</span></span><br><span class="line"> <span class="keyword">list_entry_t</span> mmap_list; <span class="comment">// 按照虚拟地址顺序双向连接的虚拟页链表</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">vma_struct</span> *<span class="title">mmap_cache</span>;</span> <span class="comment">// 当前使用的虚拟页地址,该成员加速页索引速度。</span></span><br><span class="line"> <span class="keyword">pde_t</span> *pgdir; <span class="comment">// 虚拟页对应的PDT</span></span><br><span class="line"> <span class="keyword">int</span> map_count; <span class="comment">// 虚拟页个数</span></span><br><span class="line"> <span class="keyword">void</span> *sm_priv; <span class="comment">// 用于指向swap manager的某个链表,在FIFO算法中,该双向链表用于将可交换的已分配物理页串起来</span></span><br><span class="line">};</span><br><span class="line"></span><br><span class="line"><span class="class"><span class="keyword">struct</span> <span class="title">Page</span> {</span></span><br><span class="line"> <span class="keyword">int</span> ref;</span><br><span class="line"> <span class="keyword">uint32_t</span> flags;</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">int</span> property;</span><br><span class="line"> <span class="keyword">list_entry_t</span> page_link;</span><br><span class="line"> <span class="keyword">list_entry_t</span> pra_page_link; <span class="comment">// 用于连接上一个和下一个*可交换已分配*的物理页</span></span><br><span class="line"> <span class="keyword">uintptr_t</span> pra_vaddr; <span class="comment">// 用于保存该物理页所对应的虚拟地址。</span></span><br><span class="line">};</span><br><span class="line"></span><br><span class="line"><span class="keyword">static</span> <span class="keyword">int</span></span><br><span class="line">_fifo_map_swappable(struct mm_struct *mm, <span class="keyword">uintptr_t</span> addr, struct Page *page, <span class="keyword">int</span> swap_in)</span><br><span class="line">{</span><br><span class="line"> <span class="keyword">list_entry_t</span> *head=(<span class="keyword">list_entry_t</span>*) mm->sm_priv;</span><br><span class="line"> <span class="keyword">list_entry_t</span> *entry=&(page->pra_page_link);</span><br><span class="line"></span><br><span class="line"> assert(entry != <span class="literal">NULL</span> && head != <span class="literal">NULL</span>);</span><br><span class="line"> <span class="comment">//record the page access situlation</span></span><br><span class="line"> <span class="comment">/*LAB3 EXERCISE 2: YOUR CODE*/</span></span><br><span class="line"> <span class="comment">//(1)link the most recent arrival page at the back of the pra_list_head qeueue.</span></span><br><span class="line"> list_add(head, entry);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">static</span> <span class="keyword">int</span></span><br><span class="line">_fifo_swap_out_victim(struct mm_struct *mm, struct Page ** ptr_page, <span class="keyword">int</span> in_tick)</span><br><span class="line">{</span><br><span class="line"> <span class="keyword">list_entry_t</span> *head=(<span class="keyword">list_entry_t</span>*) mm->sm_priv;</span><br><span class="line"> assert(head != <span class="literal">NULL</span>);</span><br><span class="line"> assert(in_tick==<span class="number">0</span>);</span><br><span class="line"> <span class="comment">/* Select the victim */</span></span><br><span class="line"> <span class="comment">/*LAB3 EXERCISE 2: YOUR CODE*/</span></span><br><span class="line"> <span class="comment">//(1) unlink the earliest arrival page in front of pra_list_head qeueue</span></span><br><span class="line"> <span class="comment">//(2) assign the value of *ptr_page to the addr of this page</span></span><br><span class="line"> <span class="keyword">list_entry_t</span> *le = head->prev;</span><br><span class="line"> assert(head!=le);</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">Page</span> *<span class="title">p</span> =</span> le2page(le, pra_page_link);</span><br><span class="line"> list_del(le);</span><br><span class="line"> assert(p !=<span class="literal">NULL</span>);</span><br><span class="line"> *ptr_page = p;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="http://example.com/2021/01/24/Lab6/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="lagrange">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="lagrange's blog">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2021/01/24/Lab6/" class="post-title-link" itemprop="url">Lab6 实验报告</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2021-01-24 10:47:34" itemprop="dateCreated datePublished" datetime="2021-01-24T10:47:34Z">2021-01-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2021-04-23 00:28:47" itemprop="dateModified" datetime="2021-04-23T00:28:47Z">2021-04-23</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/OS/" itemprop="url" rel="index"><span itemprop="name">OS</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1 id="Lab6"><a href="#Lab6" class="headerlink" title="Lab6"></a>Lab6</h1><h2 id="Round-Robin-调度算法"><a href="#Round-Robin-调度算法" class="headerlink" title="Round Robin 调度算法"></a>Round Robin 调度算法</h2><p>请理解并分析 sched_class 中各个函数指针的用法,并结合 Round Robin 调度算法描 ucore 的调度执行过程</p>
<h3 id="sched-class-中各个函数指针的用法"><a href="#sched-class-中各个函数指针的用法" class="headerlink" title="sched_class 中各个函数指针的用法"></a>sched_class 中各个函数指针的用法</h3><p>sched_class 的定义如下</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">// The introduction of scheduling classes is borrrowed from Linux, and makes the</span></span><br><span class="line"><span class="comment">// core scheduler quite extensible. These classes (the scheduler modules) encapsulate</span></span><br><span class="line"><span class="comment">// the scheduling policies.</span></span><br><span class="line"><span class="class"><span class="keyword">struct</span> <span class="title">sched_class</span> {</span></span><br><span class="line"> <span class="comment">// the name of sched_class</span></span><br><span class="line"> <span class="keyword">const</span> <span class="keyword">char</span> *name;</span><br><span class="line"> <span class="comment">// Init the run queue</span></span><br><span class="line"> <span class="keyword">void</span> (*init)(struct run_queue *rq);</span><br><span class="line"> <span class="comment">// put the proc into runqueue, and this function must be called with rq_lock</span></span><br><span class="line"> <span class="keyword">void</span> (*enqueue)(struct run_queue *rq, struct proc_struct *proc);</span><br><span class="line"> <span class="comment">// get the proc out runqueue, and this function must be called with rq_lock</span></span><br><span class="line"> <span class="keyword">void</span> (*dequeue)(struct run_queue *rq, struct proc_struct *proc);</span><br><span class="line"> <span class="comment">// choose the next runnable task</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">proc_struct</span> *(*<span class="title">pick_next</span>)(<span class="keyword">struct</span> <span class="title">run_queue</span> *<span class="title">rq</span>);</span></span><br><span class="line"> <span class="comment">// dealer of the time-tick</span></span><br><span class="line"> <span class="keyword">void</span> (*proc_tick)(struct run_queue *rq, struct proc_struct *proc);</span><br><span class="line"> <span class="comment">/* for SMP support in the future</span></span><br><span class="line"><span class="comment"> * load_balance</span></span><br><span class="line"><span class="comment"> * void (*load_balance)(struct rq* rq);</span></span><br><span class="line"><span class="comment"> * get some proc from this rq, used in load_balance,</span></span><br><span class="line"><span class="comment"> * return value is the num of gotten proc</span></span><br><span class="line"><span class="comment"> * int (*get_proc)(struct rq* rq, struct proc* procs_moved[]);</span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line">};</span><br></pre></td></tr></table></figure>
<p>其中,<code>const char *name</code>指向了当前调度算法的名称字符串</p>
<p><code>void (*init)(struct run_queue *rq)</code>用于初始化传入的就绪队列。RR 算法中只初始化了对应 run_queue 的 run_list 成员。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">RR_init(struct run_queue *rq) {</span><br><span class="line"> list_init(&(rq->run_list));</span><br><span class="line"> rq->proc_num = <span class="number">0</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p><code>void (*enqueue)(struct run_queue *rq, struct proc_struct *proc)</code>用于将某个进程添加进传入的队列中。RR 算法除了将进程添加进队列中,还重置了相关的时间片。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">RR_enqueue(struct run_queue *rq, struct proc_struct *proc) {</span><br><span class="line"> assert(list_empty(&(proc->run_link)));</span><br><span class="line"> list_add_before(&(rq->run_list), &(proc->run_link));</span><br><span class="line"> <span class="keyword">if</span> (proc->time_slice == <span class="number">0</span> || proc->time_slice > rq->max_time_slice) {</span><br><span class="line"> proc->time_slice = rq->max_time_slice;</span><br><span class="line"> }</span><br><span class="line"> proc->rq = rq;</span><br><span class="line"> rq->proc_num ++;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p><code>void (*dequeue)(struct run_queue *rq, struct proc_struct *proc)</code>用于将某个进程从传入的队列中移除。以下是 RR 算法的实现</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">RR_dequeue(struct run_queue *rq, struct proc_struct *proc) {</span><br><span class="line"> assert(!list_empty(&(proc->run_link)) && proc->rq == rq);</span><br><span class="line"> list_del_init(&(proc->run_link));</span><br><span class="line"> rq->proc_num --;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p><code>struct proc_struct *(*pick_next)(struct run_queue *rq)</code>用于在传入的就绪队列中选择出一个最适合运行的进程(选择进程但不将从队列中移除)。在 RR 算法中每次都只选择队列最前面那个进程。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="class"><span class="keyword">struct</span> <span class="title">proc_struct</span> *</span></span><br><span class="line"><span class="class"><span class="title">RR_pick_next</span>(<span class="keyword">struct</span> <span class="title">run_queue</span> *<span class="title">rq</span>) {</span></span><br><span class="line"> <span class="keyword">list_entry_t</span> *le = list_next(&(rq->run_list));</span><br><span class="line"> <span class="keyword">if</span> (le != &(rq->run_list)) {</span><br><span class="line"> <span class="keyword">return</span> le2proc(le, run_link);</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">return</span> <span class="literal">NULL</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p><code>void (*proc_tick)(struct run_queue *rq, struct proc_struct *proc)</code>。该函数会在时间中断处理例程中被调用,以减小当前运行进程的剩余时间片。若时间片耗尽,则设置当前进程的 need_resched 为 1。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">RR_proc_tick(struct run_queue *rq, struct proc_struct *proc) {</span><br><span class="line"> <span class="keyword">if</span> (proc->time_slice > <span class="number">0</span>) {</span><br><span class="line"> proc->time_slice --;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">if</span> (proc->time_slice == <span class="number">0</span>) {</span><br><span class="line"> proc->need_resched = <span class="number">1</span>;</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>结合 Round Robin 调度算法描 uCore 的调度执行过程</p>
<p>首先,uCore 调用 sched_init 函数用于初始化相关的就绪队列。</p>
<p>之后在 proc_init 函数中,建立第一个内核进程,并将其添加至就绪队列中。</p>
<p>当所有的初始化完成后,uCore 执行 cpu_idle 函数,并在其内部的 schedule 函数中,调用 sched_class_enqueue 将当前进程添加进就绪队列中(因为当前进程要被切换出 CPU 了)<br>然后,调用 sched_class_pick_next 获取就绪队列中可被轮换至 CPU 的进程。如果存在可用的进程,则调用 sched_class_dequeue 函数,将该进程移出就绪队列,并在之后执行 proc_run 函数进行进程上下文切换。</p>
<p>需要注意的是,每次时间中断都会调用函数 sched_class_proc_tick。该函数会减少当前运行进程的剩余时间片。如果时间片减小为 0,则设置 need_resched 为 1,并在时间中断例程完成后,在 trap 函数的剩余代码中进行进程切换。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">trap</span><span class="params">(struct trapframe *tf)</span> </span>{</span><br><span class="line"> <span class="keyword">if</span> (current == <span class="literal">NULL</span>)</span><br><span class="line"> trap_dispatch(tf);</span><br><span class="line"> <span class="keyword">else</span> {</span><br><span class="line"> struct trapframe *otf = current->tf;</span><br><span class="line"> current->tf = tf;</span><br><span class="line"> <span class="keyword">bool</span> in_kernel = trap_in_kernel(tf);</span><br><span class="line"> <span class="comment">// 执行对应的中断处理例程</span></span><br><span class="line"> trap_dispatch(tf);</span><br><span class="line"> <span class="comment">// 恢复对应的trapframe</span></span><br><span class="line"> current->tf = otf;</span><br><span class="line"> <span class="comment">// 如果当前中断的是用户进程</span></span><br><span class="line"> <span class="comment">// 注意这里体现出用户进程的可抢占性</span></span><br><span class="line"> <span class="keyword">if</span> (!in_kernel) {</span><br><span class="line"> <span class="keyword">if</span> (current->flags & PF_EXITING)</span><br><span class="line"> do_exit(-E_KILLED);</span><br><span class="line"> <span class="comment">// 如果在中断处理例程中设置need_resched为1,则在此处切换进程</span></span><br><span class="line"> <span class="keyword">if</span> (current->need_resched)</span><br><span class="line"> schedule();</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="http://example.com/2021/01/24/Lab7/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="lagrange">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="lagrange's blog">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2021/01/24/Lab7/" class="post-title-link" itemprop="url">Lab7 实验报告</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2021-01-24 10:47:34" itemprop="dateCreated datePublished" datetime="2021-01-24T10:47:34Z">2021-01-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2021-04-23 00:29:15" itemprop="dateModified" datetime="2021-04-23T00:29:15Z">2021-04-23</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/OS/" itemprop="url" rel="index"><span itemprop="name">OS</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1 id="Lab7"><a href="#Lab7" class="headerlink" title="Lab7"></a>Lab7</h1><h2 id="定时器-Timer"><a href="#定时器-Timer" class="headerlink" title="定时器 Timer"></a>定时器 Timer</h2><p>timer_t 结构用于存储一个定时器所需要的相关数据,包括倒计时时间以及所绑定的进程。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">list_entry_t</span> timer_list;</span><br><span class="line"></span><br><span class="line"><span class="keyword">typedef</span> <span class="class"><span class="keyword">struct</span> {</span></span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">int</span> expires; <span class="comment">//the expire time</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">proc_struct</span> *<span class="title">proc</span>;</span> <span class="comment">//the proc wait in this timer. If the expire time is end, then this proc will be scheduled</span></span><br><span class="line"> <span class="keyword">list_entry_t</span> timer_link; <span class="comment">//the timer list</span></span><br><span class="line">} <span class="keyword">timer_t</span>;</span><br></pre></td></tr></table></figure>
<p>add_timer 用于将某个 timer 添加进 timer 列表中。</p>
<p>处于性能考虑,每个新添加的 timer 都会按照其 expires 属性的大小排列,同时减去上一个 timer 的 expires 属性。一个例子:</p>
<p>两个尚未添加进列表中的 timer:</p>
<p>timer1->expires = 20;<br>timer2->expires = 38;</p>
<p>将这两个 timer 添加进列表后:(注意 timer2 的 expires)</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">+------------+ +----------------------+ +--------------------------+</span><br><span class="line">| timer_list | <---> | timer1->expires = 20 | <---> | timer2->expires = 18 !!! |</span><br><span class="line">+------------+ +----------------------+ +--------------------------+</span><br></pre></td></tr></table></figure>
<p>这样,在更新 timer_list 中的所有 timer 的 expires 时,只需递减链首的第一个 timer 的 expire,即可间接达到所有 timer 的 expires 减一的目的。</p>
<p>该函数源代码如下</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">// add timer to timer_list</span></span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">add_timer</span><span class="params">(<span class="keyword">timer_t</span> *timer)</span> </span>{</span><br><span class="line"> <span class="keyword">bool</span> intr_flag;</span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> {</span><br><span class="line"> <span class="keyword">list_entry_t</span> *le = list_next(&timer_list);</span><br><span class="line"> <span class="comment">// 减去每个遍历到的timer的expires</span></span><br><span class="line"> <span class="keyword">while</span> (le != &timer_list) {</span><br><span class="line"> <span class="keyword">timer_t</span> *next = le2timer(le, timer_link);</span><br><span class="line"> <span class="keyword">if</span> (timer->expires < next->expires) {</span><br><span class="line"> next->expires -= timer->expires;</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> }</span><br><span class="line"> timer->expires -= next->expires;</span><br><span class="line"> le = list_next(le);</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 将当前timer添加至列表中</span></span><br><span class="line"> list_add_before(le, &(timer->timer_link));</span><br><span class="line"> }</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>run_timer_list 函数用于更新定时器的时间,并更新当前进程的运行时间片。如果当前定时器的剩余时间结束,则唤醒某个处于 WT_INTERRUPTED 等待状态的进程。有一点在上个函数中提到过:递减 timer_list 中每个 timer 的 expires 时,只递减链头第一个 timer 的 expires。该函数的源代码如下</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">// call scheduler to update tick related info, and check the timer is expired? If expired, then wakup proc</span></span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">run_timer_list</span><span class="params">(<span class="keyword">void</span>)</span> </span>{</span><br><span class="line"> <span class="keyword">bool</span> intr_flag;</span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> {</span><br><span class="line"> <span class="keyword">list_entry_t</span> *le = list_next(&timer_list);</span><br><span class="line"> <span class="keyword">if</span> (le != &timer_list) {</span><br><span class="line"> <span class="keyword">timer_t</span> *timer = le2timer(le, timer_link);</span><br><span class="line"> <span class="comment">// 只递减链头timer的expires</span></span><br><span class="line"> timer->expires --;</span><br><span class="line"> <span class="keyword">while</span> (timer->expires == <span class="number">0</span>) {</span><br><span class="line"> le = list_next(le);</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">proc_struct</span> *<span class="title">proc</span> =</span> timer->proc;</span><br><span class="line"> wakeup_proc(proc);</span><br><span class="line"> del_timer(timer);</span><br><span class="line"> <span class="keyword">if</span> (le == &timer_list)</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> timer = le2timer(le, timer_link);</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 当前进程执行时间减 1</span></span><br><span class="line"> sched_class_proc_tick(current);</span><br><span class="line"> }</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>将 timer 从 timer_list 中删除的操作比较简单:设置好当前待移除 timer 的下一个 timer->expires,并将当前 timer 从链表中移除即可。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">// del timer from timer_list</span></span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">del_timer</span><span class="params">(<span class="keyword">timer_t</span> *timer)</span> </span>{</span><br><span class="line"> <span class="keyword">bool</span> intr_flag;</span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> {</span><br><span class="line"> <span class="keyword">if</span> (!list_empty(&(timer->timer_link))) {</span><br><span class="line"> <span class="keyword">if</span> (timer->expires != <span class="number">0</span>) {</span><br><span class="line"> <span class="keyword">list_entry_t</span> *le = list_next(&(timer->timer_link));</span><br><span class="line"> <span class="keyword">if</span> (le != &timer_list) {</span><br><span class="line"> <span class="keyword">timer_t</span> *next = le2timer(le, timer_link);</span><br><span class="line"> next->expires += timer->expires;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> list_del_init(&(timer->timer_link));</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>一个简单的例子,do_sleep 函数</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">do_sleep</span><span class="params">(<span class="keyword">unsigned</span> <span class="keyword">int</span> time)</span> </span>{</span><br><span class="line"> <span class="keyword">if</span> (time == <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line"> <span class="keyword">bool</span> intr_flag;</span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> {</span><br><span class="line"> <span class="comment">// 设置定时器</span></span><br><span class="line"> <span class="keyword">timer_t</span> __timer, *timer = timer_init(&__timer, current, time);</span><br><span class="line"> current->state = PROC_SLEEPING;</span><br><span class="line"> current->wait_state = WT_TIMER;</span><br><span class="line"> <span class="comment">// 启用定时器</span></span><br><span class="line"> add_timer(timer);</span><br><span class="line"> }</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line"> <span class="comment">// 当前进程放弃CPU资源</span></span><br><span class="line"> schedule();</span><br><span class="line"> <span class="comment">// 时间到点了,删除当前timer</span></span><br><span class="line"> del_timer(timer);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>定时器的用处:定时器可以帮助操作系统在经过一段特定时间后执行一些特殊操作,例如唤醒执行线程。可以说,正是有了定时器,操作系统才有了时间这个概念。</p>
<h2 id="内核信号量实现"><a href="#内核信号量实现" class="headerlink" title="内核信号量实现"></a>内核信号量实现</h2><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">int</span> pid = kernel_thread(init_main, <span class="literal">NULL</span>, <span class="number">0</span>);</span><br><span class="line"></span><br><span class="line"><span class="keyword">static</span> <span class="keyword">int</span></span><br><span class="line">init_main(<span class="keyword">void</span> *arg) {</span><br><span class="line"> ...</span><br><span class="line"><span class="function"><span class="keyword">extern</span> <span class="keyword">void</span> <span class="title">check_sync</span><span class="params">(<span class="keyword">void</span>)</span></span>;</span><br><span class="line"> check_sync(); <span class="comment">// check philosopher sync problem</span></span><br><span class="line"></span><br><span class="line"> <span class="keyword">while</span> (do_wait(<span class="number">0</span>, <span class="literal">NULL</span>) == <span class="number">0</span>)</span><br><span class="line"> schedule();</span><br><span class="line"> ...</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">check_sync</span><span class="params">(<span class="keyword">void</span>)</span></span>{</span><br><span class="line"> <span class="keyword">int</span> i;</span><br><span class="line"> <span class="comment">//check semaphore</span></span><br><span class="line"> sem_init(&mutex, <span class="number">1</span>);</span><br><span class="line"> <span class="keyword">for</span>(i=<span class="number">0</span>;i<N;i++){</span><br><span class="line"> sem_init(&s[i], <span class="number">0</span>);</span><br><span class="line"> <span class="keyword">int</span> pid = kernel_thread(philosopher_using_semaphore, (<span class="keyword">void</span> *)i, <span class="number">0</span>);</span><br><span class="line"> philosopher_proc_sema[i] = find_proc(pid);</span><br><span class="line"> set_proc_name(philosopher_proc_sema[i], <span class="string">"philosopher_sema_proc"</span>);</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>在第一个内核进程 idleproc 调用 kernel_thread 派生出第二个进程 init 执行 init_main 函数,init_main 函数调用 check_sync 创建了五个哲学家进程,此时进程控制权还在 init 手中,之后 init 执行 do_wait,主动放弃时间片,执行 schedule(),五个哲学家开始活动。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">philosopher_using_semaphore</span><span class="params">(<span class="keyword">void</span> * arg)</span> <span class="comment">/* i:哲学家号码,从0到N-1 */</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> <span class="keyword">int</span> i, iter=<span class="number">0</span>;</span><br><span class="line"> i=(<span class="keyword">int</span>)arg;</span><br><span class="line"> cprintf(<span class="string">"I am No.%d philosopher_sema\n"</span>,i);</span><br><span class="line"> <span class="keyword">while</span>(iter++<TIMES)</span><br><span class="line"> { <span class="comment">/* 无限循环 */</span></span><br><span class="line"> cprintf(<span class="string">"Iter %d, No.%d philosopher_sema is thinking\n"</span>,iter,i); <span class="comment">/* 哲学家正在思考 */</span></span><br><span class="line"> do_sleep(SLEEP_TIME);</span><br><span class="line"> phi_take_forks_sema(i);</span><br><span class="line"> <span class="comment">/* 需要两只叉子,或者阻塞 */</span></span><br><span class="line"> cprintf(<span class="string">"Iter %d, No.%d philosopher_sema is eating\n"</span>,iter,i); <span class="comment">/* 进餐 */</span></span><br><span class="line"> do_sleep(SLEEP_TIME);</span><br><span class="line"> phi_put_forks_sema(i);</span><br><span class="line"> <span class="comment">/* 把两把叉子同时放回桌子 */</span></span><br><span class="line"> }</span><br><span class="line"> cprintf(<span class="string">"No.%d philosopher_sema quit\n"</span>,i);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">phi_take_forks_sema</span><span class="params">(<span class="keyword">int</span> i)</span> <span class="comment">/* i:哲学家号码从0到N-1 */</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> down(&mutex); <span class="comment">/* 进入临界区 */</span></span><br><span class="line"> state_sema[i]=HUNGRY; <span class="comment">/* 记录下哲学家i饥饿的事实 */</span></span><br><span class="line"> phi_test_sema(i); <span class="comment">/* 试图得到两只叉子 */</span></span><br><span class="line"> up(&mutex); <span class="comment">/* 离开临界区 */</span></span><br><span class="line"> down(&s[i]); <span class="comment">/* 如果得不到叉子就阻塞 */</span></span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">phi_put_forks_sema</span><span class="params">(<span class="keyword">int</span> i)</span> <span class="comment">/* i:哲学家号码从0到N-1 */</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> down(&mutex); <span class="comment">/* 进入临界区 */</span></span><br><span class="line"> state_sema[i]=THINKING; <span class="comment">/* 哲学家进餐结束 */</span></span><br><span class="line"> phi_test_sema(LEFT); <span class="comment">/* 看一下左邻居现在是否能进餐 */</span></span><br><span class="line"> phi_test_sema(RIGHT); <span class="comment">/* 看一下右邻居现在是否能进餐 */</span></span><br><span class="line"> up(&mutex); <span class="comment">/* 离开临界区 */</span></span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>上面是哲学家就餐问题的解,但本文主要聚焦于内核信号量的实现,即 sem_init,up,down 这三个函数的实现。</p>
<h3 id="sem-init"><a href="#sem-init" class="headerlink" title="sem_init"></a>sem_init</h3><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">typedef</span> <span class="class"><span class="keyword">struct</span> {</span></span><br><span class="line"> <span class="keyword">int</span> value;</span><br><span class="line"> <span class="keyword">wait_queue_t</span> wait_queue;</span><br><span class="line">} <span class="keyword">semaphore_t</span>;</span><br><span class="line"></span><br><span class="line"><span class="keyword">void</span></span><br><span class="line">sem_init(<span class="keyword">semaphore_t</span> *sem, <span class="keyword">int</span> value) {</span><br><span class="line"> sem->value = value;</span><br><span class="line"> wait_queue_init(&(sem->wait_queue));</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>sem_init 函数初始化了一个等待队列以及信号量的初值。</p>
<h3 id="up"><a href="#up" class="headerlink" title="up"></a>up</h3><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">void</span></span><br><span class="line">up(<span class="keyword">semaphore_t</span> *sem) {</span><br><span class="line"> __up(sem, WT_KSEM);</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">static</span> __noinline <span class="keyword">void</span> __up(<span class="keyword">semaphore_t</span> *sem, <span class="keyword">uint32_t</span> wait_state) {</span><br><span class="line"> <span class="keyword">bool</span> intr_flag;</span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> {</span><br><span class="line"> <span class="keyword">wait_t</span> *wait;</span><br><span class="line"> <span class="comment">// 如果没有进程需要唤醒,增加信号量的值</span></span><br><span class="line"> <span class="keyword">if</span> ((wait = wait_queue_first(&(sem->wait_queue))) == <span class="literal">NULL</span>)</span><br><span class="line"> sem->value ++;</span><br><span class="line"> <span class="keyword">else</span></span><br><span class="line"> <span class="comment">// 否则唤醒在该信号量等待队列中的第一个进程</span></span><br><span class="line"> wakeup_wait(&(sem->wait_queue), wait, wait_state, <span class="number">1</span>);</span><br><span class="line"> }</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">void</span></span><br><span class="line">wakeup_wait(<span class="keyword">wait_queue_t</span> *<span class="built_in">queue</span>, <span class="keyword">wait_t</span> *wait, <span class="keyword">uint32_t</span> wakeup_flags, <span class="keyword">bool</span> del) {</span><br><span class="line"> <span class="comment">// 在信号量等待队列删除该进程</span></span><br><span class="line"> <span class="keyword">if</span> (del)</span><br><span class="line"> wait_queue_del(<span class="built_in">queue</span>, wait);</span><br><span class="line"> wait->wakeup_flags = wakeup_flags;</span><br><span class="line"> <span class="comment">// 唤醒进程</span></span><br><span class="line"> wakeup_proc(wait->proc);</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">void</span></span><br><span class="line">wakeup_proc(struct proc_struct *proc) {</span><br><span class="line"> assert(proc->state != PROC_ZOMBIE);</span><br><span class="line"> <span class="keyword">bool</span> intr_flag;</span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> {</span><br><span class="line"> <span class="comment">// 设置进程状态为就绪</span></span><br><span class="line"> <span class="keyword">if</span> (proc->state != PROC_RUNNABLE) {</span><br><span class="line"> proc->state = PROC_RUNNABLE;</span><br><span class="line"> proc->wait_state = <span class="number">0</span>;</span><br><span class="line"> <span class="keyword">if</span> (proc != current) {</span><br><span class="line"> <span class="comment">// 将进程放入调度队列</span></span><br><span class="line"> sched_class_enqueue(proc);</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">else</span> {</span><br><span class="line"> warn(<span class="string">"wakeup runnable process.\n"</span>);</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<h3 id="down"><a href="#down" class="headerlink" title="down"></a>down</h3><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">void</span></span><br><span class="line">down(<span class="keyword">semaphore_t</span> *sem) {</span><br><span class="line"> <span class="keyword">uint32_t</span> flags = __down(sem, WT_KSEM);</span><br><span class="line"> assert(flags == <span class="number">0</span>);</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">static</span> __noinline <span class="keyword">uint32_t</span> __down(<span class="keyword">semaphore_t</span> *sem, <span class="keyword">uint32_t</span> wait_state) {</span><br><span class="line"> <span class="keyword">bool</span> intr_flag;</span><br><span class="line"></span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> <span class="comment">// 如果信号量大于 0</span></span><br><span class="line"> <span class="keyword">if</span> (sem->value > <span class="number">0</span>) {</span><br><span class="line"> <span class="comment">// 减完信号量直接返回</span></span><br><span class="line"> sem->value --;</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">wait_t</span> __wait, *wait = &__wait;</span><br><span class="line"> <span class="comment">// 将当前的进程放入等待队列</span></span><br><span class="line"> wait_current_set(&(sem->wait_queue), wait, wait_state);</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line"></span><br><span class="line"> <span class="comment">// 切到其他进程,放弃时间片</span></span><br><span class="line"> schedule();</span><br><span class="line"></span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> <span class="comment">//恢复执行后将当前进程从wait队列删除</span></span><br><span class="line"> wait_current_del(&(sem->wait_queue), wait);</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line"></span><br><span class="line"> <span class="keyword">if</span> (wait->wakeup_flags != wait_state) {</span><br><span class="line"> <span class="keyword">return</span> wait->wakeup_flags;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">void</span></span><br><span class="line">wait_current_set(<span class="keyword">wait_queue_t</span> *<span class="built_in">queue</span>, <span class="keyword">wait_t</span> *wait, <span class="keyword">uint32_t</span> wait_state) {</span><br><span class="line"> <span class="comment">// 用当前的进程建一个 wait 结构体放入信号量等待队列</span></span><br><span class="line"> wait_init(wait, current);</span><br><span class="line"> <span class="comment">// 设置当前的进程状态为睡眠状态,后面调用 schedule 放弃时间片</span></span><br><span class="line"> current->state = PROC_SLEEPING;</span><br><span class="line"> current->wait_state = wait_state;</span><br><span class="line"> wait_queue_add(<span class="built_in">queue</span>, wait);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<h2 id="管程"><a href="#管程" class="headerlink" title="管程"></a>管程</h2><p><a target="_blank" rel="noopener" href="https://kiprey.github.io/2020/09/uCore-7/#%E7%BB%83%E4%B9%A02">管程</a></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="http://example.com/2021/01/24/Lab4/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="lagrange">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="lagrange's blog">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2021/01/24/Lab4/" class="post-title-link" itemprop="url">Lab4 实验报告</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2021-01-24 10:47:34" itemprop="dateCreated datePublished" datetime="2021-01-24T10:47:34Z">2021-01-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2021-04-23 00:27:48" itemprop="dateModified" datetime="2021-04-23T00:27:48Z">2021-04-23</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/OS/" itemprop="url" rel="index"><span itemprop="name">OS</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1 id="Lab4"><a href="#Lab4" class="headerlink" title="Lab4"></a>Lab4</h1><h2 id="数据结构"><a href="#数据结构" class="headerlink" title="数据结构"></a>数据结构</h2><h3 id="进程控制块-PCB"><a href="#进程控制块-PCB" class="headerlink" title="进程控制块 PCB"></a>进程控制块 PCB</h3><p>在 ucore 中,并不显式的区分进程与线程,都使用同样的数据结构 proc_struct 进程/线程管理块进行管理。当不同的线程控制块对应的页表(cr3)相同时,ucore 认为是同一进程下的不同线程。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">//由于进程数量可能较大,倘若从头向后遍历查找符合某个状态的PCB,则效率会十分低下</span></span><br><span class="line"><span class="comment">//因此使用了哈希表作为遍历所用的数据结构。</span></span><br><span class="line"><span class="keyword">static</span> <span class="keyword">list_entry_t</span> hash_list[HASH_LIST_SIZE];</span><br><span class="line"></span><br><span class="line"><span class="comment">//进程调度时用这个链表顺着索引</span></span><br><span class="line"><span class="keyword">list_entry_t</span> proc_list;</span><br><span class="line"></span><br><span class="line"><span class="class"><span class="keyword">enum</span> <span class="title">proc_state</span> {</span></span><br><span class="line"> PROC_UNINIT = <span class="number">0</span>, <span class="comment">// 未初始化的 -- alloc_proc</span></span><br><span class="line"> PROC_SLEEPING, <span class="comment">// 等待状态 -- try_free_pages, do_wait, do_sleep</span></span><br><span class="line"> PROC_RUNNABLE, <span class="comment">// 就绪/运行状态 -- proc_init, wakeup_proc,</span></span><br><span class="line"> PROC_ZOMBIE, <span class="comment">// 僵死状态 -- do_exit</span></span><br><span class="line">};</span><br><span class="line"><span class="class"><span class="keyword">struct</span> <span class="title">context</span> {</span> <span class="comment">// 保存的上下文寄存器,注意没有eax寄存器和段寄存器</span></span><br><span class="line"> <span class="keyword">uint32_t</span> eip;</span><br><span class="line"> <span class="keyword">uint32_t</span> esp;</span><br><span class="line"> <span class="keyword">uint32_t</span> ebx;</span><br><span class="line"> <span class="keyword">uint32_t</span> ecx;</span><br><span class="line"> <span class="keyword">uint32_t</span> edx;</span><br><span class="line"> <span class="keyword">uint32_t</span> esi;</span><br><span class="line"> <span class="keyword">uint32_t</span> edi;</span><br><span class="line"> <span class="keyword">uint32_t</span> ebp;</span><br><span class="line">};</span><br><span class="line"></span><br><span class="line"><span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 进程控制块结构(ucore进程和线程都使用proc_struct进行管理)</span></span><br><span class="line"><span class="comment"> * */</span></span><br><span class="line"><span class="class"><span class="keyword">struct</span> <span class="title">proc_struct</span> {</span></span><br><span class="line"> <span class="class"><span class="keyword">enum</span> <span class="title">proc_state</span> <span class="title">state</span>;</span> <span class="comment">// 当前进程的状态</span></span><br><span class="line"> <span class="keyword">int</span> pid; <span class="comment">// 进程ID</span></span><br><span class="line"> <span class="keyword">int</span> runs; <span class="comment">// 当前进程被调度的次数</span></span><br><span class="line"> <span class="keyword">uintptr_t</span> kstack; <span class="comment">// 内核栈</span></span><br><span class="line"> <span class="keyword">volatile</span> <span class="keyword">bool</span> need_resched; <span class="comment">// 是否需要被调度</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">proc_struct</span> *<span class="title">parent</span>;</span> <span class="comment">// 父进程ID</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">mm_struct</span> *<span class="title">mm</span>;</span> <span class="comment">// 当前进程所管理的虚拟内存页,包括其所属的页目录项PDT</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">context</span> <span class="title">context</span>;</span> <span class="comment">// 保存的上下文</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">trapframe</span> *<span class="title">tf</span>;</span> <span class="comment">// 中断所保存的上下文</span></span><br><span class="line"> <span class="keyword">uintptr_t</span> cr3; <span class="comment">// 页目录表的地址</span></span><br><span class="line"> <span class="keyword">uint32_t</span> flags; <span class="comment">// 当前进程的相关标志</span></span><br><span class="line"> <span class="keyword">char</span> name[PROC_NAME_LEN + <span class="number">1</span>]; <span class="comment">// 进程名称(可执行文件名)</span></span><br><span class="line"> <span class="keyword">list_entry_t</span> list_link; <span class="comment">// 用于连接list 切进程用的那个</span></span><br><span class="line"> <span class="keyword">list_entry_t</span> hash_link; <span class="comment">// 用于连接hash list 链在hash表上的那个</span></span><br><span class="line">};</span><br></pre></td></tr></table></figure>
<h2 id="idleproc"><a href="#idleproc" class="headerlink" title="idleproc"></a>idleproc</h2><p>idleproc 作为 ucore 的第一个进程,其目的就是会执行 cpu_idle 函数,并从中调用 schedule 函数,准备开始调度进程。作为第一个内核进程,可以说在它之前 ucore 所有执行的内容是没有进程的概念的,但 idleproc 出现后 ucore 后面的初始化代码都是以 idleproc 进程的名义执行。</p>
<p>下面是对 ucore 进行的初始化操作。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">// 分配一个proc_struct结构</span></span><br><span class="line"><span class="keyword">if</span> ((idleproc = alloc_proc()) == <span class="literal">NULL</span>)</span><br><span class="line"> panic(<span class="string">"cannot alloc idleproc.\n"</span>);</span><br><span class="line"><span class="comment">// 该空闲进程作为第一个进程,pid为0</span></span><br><span class="line">idleproc->pid = <span class="number">0</span>;</span><br><span class="line"><span class="comment">// 设置该空闲进程始终可运行</span></span><br><span class="line">idleproc->state = PROC_RUNNABLE;</span><br><span class="line"><span class="comment">// 设置空闲进程的内核栈</span></span><br><span class="line">idleproc->kstack = (<span class="keyword">uintptr_t</span>)bootstack;</span><br><span class="line"><span class="comment">// 设置该空闲进程为可调度</span></span><br><span class="line">idleproc->need_resched = <span class="number">1</span>;</span><br><span class="line">set_proc_name(idleproc, <span class="string">"idle"</span>);</span><br><span class="line">nr_process++;</span><br><span class="line"><span class="comment">// 设置当前运行的进程为该空闲进程</span></span><br><span class="line">current = idleproc;</span><br></pre></td></tr></table></figure>
<p>这段初始化将 ucore 在 entry.S 设置的新的栈赋给了 idleproc->kstack,ucore 一直到现在只用过两个栈,第一个是在启动块设置段表的时候顺手把栈改成 0x7c00 处,后面是在 entry.S 中声明了两页大小的内核栈,一直用到现在。这也表明了剩下的函数以 idleproc 的名义在执行,至于其 context 上下文,在 switch_to(from, to)会被换到其 PCB 的 context 中。</p>
<h2 id="TSS"><a href="#TSS" class="headerlink" title="TSS"></a>TSS</h2><p>TSS 是一个特殊的段。在 Linux 中,CPU 从系统态切换到用户态时会用到 TSS 里面的 ss0 和 esp0。每个 CPU 只维护一个 TSS。TR 寄存器指向这个 TSS,切换时里面的 ss0 和 esp0 会有改变。相应有一个 TSS 段放在 GDT 中,是 GDT 的一个表项。</p>
<p>在 CPU 的中断被触发的时候,CPU 会通过 TR 寄存器中的值找到位于 GDT 表中的 TSS 段,该段指向了一个 TSS 结构体,在这个结构体中 CPU 取出里面的 ss0 和 esp0,将新的栈地址设置为 ss0 和 esp0,并将之前旧的 ss,espeflags,cs,eip 全部压到新的栈里面去。所以观察一个栈帧的结构可以发现。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br></pre></td><td class="code"><pre><span class="line"><span class="class"><span class="keyword">struct</span> <span class="title">trapframe</span> {</span></span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">pushregs</span> <span class="title">tf_regs</span>;</span> <span class="comment">//那大堆push后的pushal压进去的</span></span><br><span class="line"> <span class="keyword">uint16_t</span> tf_gs; <span class="comment">//跳转到__alltraps后那一大堆push压进去的</span></span><br><span class="line"> <span class="keyword">uint16_t</span> tf_padding0;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_fs;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_padding1;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_es;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_padding2;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_ds;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_padding3;</span><br><span class="line"> <span class="keyword">uint32_t</span> tf_trapno;</span><br><span class="line"> <span class="comment">/* below here defined by x86 hardware */</span>这些东西是CPU切换到内核栈的时候自动压进去的</span><br><span class="line"> <span class="keyword">uint32_t</span> tf_err;</span><br><span class="line"> <span class="keyword">uintptr_t</span> tf_eip;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_cs;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_padding4;</span><br><span class="line"> <span class="keyword">uint32_t</span> tf_eflags;</span><br><span class="line"> <span class="comment">/* below here only when crossing rings, such as from user to kernel */</span></span><br><span class="line"> <span class="keyword">uintptr_t</span> tf_esp;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_ss;</span><br><span class="line"> <span class="keyword">uint16_t</span> tf_padding5;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>__alltraps 执行完成 pushal 后</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta"># load GD_KDATA into %ds and %es to set up data segments for kernel</span></span><br><span class="line">movl $GD_KDATA, %eax</span><br><span class="line">movw %ax, %ds 设置新的ds、es,旧的已经被压到trapframe里了</span><br><span class="line">movw %ax, %es</span><br><span class="line"></span><br><span class="line"><span class="meta"># push %esp to pass a pointer to the trapframe as an argument to trap()</span></span><br><span class="line">pushl %esp 现在的栈顶值esp指向了一个完整的栈帧,当作参数传给trap就能够根据栈帧进行中断处理</span><br><span class="line"></span><br><span class="line"><span class="meta"># call trap(tf), where tf=%esp</span></span><br><span class="line">call trap</span><br></pre></td></tr></table></figure>
<h3 id="TSS-的设置"><a href="#TSS-的设置" class="headerlink" title="TSS 的设置"></a>TSS 的设置</h3><p>首先是在最后一次设置 GDT 表的时候</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="class"><span class="keyword">struct</span> <span class="title">segdesc</span> <span class="title">gdt</span>[] =</span> {</span><br><span class="line"> SEG_NULL,</span><br><span class="line"> [SEG_KTEXT] = SEG(STA_X | STA_R, <span class="number">0x0</span>, <span class="number">0xFFFFFFFF</span>, DPL_KERNEL),</span><br><span class="line"> [SEG_KDATA] = SEG(STA_W, <span class="number">0x0</span>, <span class="number">0xFFFFFFFF</span>, DPL_KERNEL),</span><br><span class="line"> [SEG_UTEXT] = SEG(STA_X | STA_R, <span class="number">0x0</span>, <span class="number">0xFFFFFFFF</span>, DPL_USER),</span><br><span class="line"> [SEG_UDATA] = SEG(STA_W, <span class="number">0x0</span>, <span class="number">0xFFFFFFFF</span>, DPL_USER),</span><br><span class="line"> [SEG_TSS] = SEG_NULL,</span><br><span class="line">};</span><br><span class="line"></span><br><span class="line"><span class="class"><span class="keyword">struct</span> <span class="title">taskstate</span> {</span></span><br><span class="line"> .......</span><br><span class="line"> <span class="keyword">uintptr_t</span> ts_esp0; <span class="comment">// stack pointers and segment</span></span><br><span class="line"> <span class="keyword">uint16_t</span> ts_ss0; <span class="comment">// after an increase in</span></span><br><span class="line"> <span class="keyword">uint16_t</span> ts_iomb; <span class="comment">// i/o map base address</span></span><br><span class="line"> .......</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">static</span> <span class="class"><span class="keyword">struct</span> <span class="title">taskstate</span> <span class="title">ts</span> =</span> {<span class="number">0</span>};</span><br><span class="line"></span><br><span class="line"><span class="comment">/* gdt_init - initialize the default GDT and TSS */</span></span><br><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">gdt_init(<span class="keyword">void</span>) {</span><br><span class="line"> <span class="comment">// set boot kernel stack and default SS0</span></span><br><span class="line"> ts.ts_esp0 = (<span class="keyword">uintptr_t</span>)bootstacktop;</span><br><span class="line"> ts.ts_ss0 = KERNEL_DS;<span class="comment">//段基址寄存器 13位索引+1位T1+2位CPL</span></span><br><span class="line"></span><br><span class="line"> <span class="comment">// initialize the TSS filed of the gdt</span></span><br><span class="line"> gdt[SEG_TSS] = SEGTSS(STS_T32A, (<span class="keyword">uintptr_t</span>)&ts, <span class="keyword">sizeof</span>(ts), DPL_KERNEL);</span><br><span class="line"></span><br><span class="line"> <span class="comment">// reload all segment registers</span></span><br><span class="line"> lgdt(&gdt_pd);</span><br><span class="line"></span><br><span class="line"> <span class="comment">// load the TSS</span></span><br><span class="line"> <span class="comment">//一个gdt表项八个字节,所以GD_TSS = ((SEG_TSS) << 3) = SEG_TSS*8 TR寄存器里面存的是偏移</span></span><br><span class="line"> ltr(GD_TSS);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>首先是把当前的 TSS 设置为了之前 entry.S 申请的栈底(栈向低地址处生长,那段汇编先写的 bootstack 再写的 bootstacktop,所以 bootstacktop 是高地址,作为栈底),再将 TSS 中的 ss0 换成内核栈的段选择子,最后是将 GDT 的最后一项 TSS 填充完成,在 load TR 寄存器,完成当前的 TSS 设置(就当现在执行的是 idleproc 进程的话,TSS 里面确实也是存的这个进程的内核栈地址)。</p>
<p>然后再每次程序切换的时候</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">// setup_kstack - alloc pages with size KSTACKPAGE as process kernel stack</span></span><br><span class="line"><span class="keyword">static</span> <span class="keyword">int</span></span><br><span class="line">setup_kstack(struct proc_struct *proc)</span><br><span class="line">{</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">Page</span> *<span class="title">page</span> =</span> alloc_pages(KSTACKPAGE);</span><br><span class="line"> <span class="keyword">if</span> (page != <span class="literal">NULL</span>)</span><br><span class="line"> {</span><br><span class="line"> proc->kstack = (<span class="keyword">uintptr_t</span>)page2kva(page);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">return</span> -E_NO_MEM;</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">proc_run</span><span class="params">(struct proc_struct *proc)</span> </span>{</span><br><span class="line"> <span class="keyword">if</span> (proc != current) {</span><br><span class="line"> <span class="keyword">bool</span> intr_flag;</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">proc_struct</span> *<span class="title">prev</span> =</span> current, *next = proc;</span><br><span class="line"> <span class="comment">//设置内核栈地址与加载页目录项等这类关键操作不能被中断给打断。</span></span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> {</span><br><span class="line"> <span class="comment">// 设置当前执行的进程</span></span><br><span class="line"> current = proc;</span><br><span class="line"> <span class="comment">// 设置ring0的内核栈地址</span></span><br><span class="line"> load_esp0(next->kstack + KSTACKSIZE);</span><br><span class="line"> <span class="comment">// 加载页目录表</span></span><br><span class="line"> lcr3(next->cr3);</span><br><span class="line"> <span class="comment">// 切换上下文</span></span><br><span class="line"> switch_to(&(prev->context), &(next->context));</span><br><span class="line"> }</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line"> }</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line">.globl switch_to</span><br><span class="line">switch_to: # switch_to(from, to)</span><br><span class="line"> # save from's registers</span><br><span class="line"> movl <span class="number">4</span>(%esp), %eax # 获取当前进程的context结构地址</span><br><span class="line"> popl <span class="number">0</span>(%eax) # 将eip保存至当前进程的context结构</span><br><span class="line"> movl %esp, <span class="number">4</span>(%eax) # 将esp保存至当前进程的context结构</span><br><span class="line"> movl %ebx, <span class="number">8</span>(%eax) # 将ebx保存至当前进程的context结构</span><br><span class="line"> movl %ecx, <span class="number">12</span>(%eax) # 将ecx保存至当前进程的context结构</span><br><span class="line"> movl %edx, <span class="number">16</span>(%eax) # 将edx保存至当前进程的context结构</span><br><span class="line"> movl %esi, <span class="number">20</span>(%eax) # 将esi保存至当前进程的context结构</span><br><span class="line"> movl %edi, <span class="number">24</span>(%eax) # 将edi保存至当前进程的context结构</span><br><span class="line"> movl %ebp, <span class="number">28</span>(%eax) # 将ebp保存至当前进程的context结构</span><br><span class="line"></span><br><span class="line"> # restore to's registers</span><br><span class="line"> movl <span class="number">4</span>(%esp), %eax # 获取下一个进程的context结构地址</span><br><span class="line"> # 需要注意的是,其地址不是<span class="number">8</span>(%esp),因为之前已经pop过一次栈。</span><br><span class="line"> movl <span class="number">28</span>(%eax), %ebp # 恢复ebp至下一个进程的context结构</span><br><span class="line"> movl <span class="number">24</span>(%eax), %edi # 恢复edi至下一个进程的context结构</span><br><span class="line"> movl <span class="number">20</span>(%eax), %esi # 恢复esi至下一个进程的context结构</span><br><span class="line"> movl <span class="number">16</span>(%eax), %edx # 恢复edx至下一个进程的context结构</span><br><span class="line"> movl <span class="number">12</span>(%eax), %ecx # 恢复ecx至下一个进程的context结构</span><br><span class="line"> movl <span class="number">8</span>(%eax), %ebx # 恢复ebx至下一个进程的context结构</span><br><span class="line"> movl <span class="number">4</span>(%eax), %esp # 恢复esp至下一个进程的context结构</span><br><span class="line"> pushl <span class="number">0</span>(%eax) # 插入下一个进程的eip,以便于ret到下个进程的代码位置。</span><br><span class="line"> ret</span><br></pre></td></tr></table></figure>
<p>把当前的内核栈地址换成当前执行进程的内核栈底,之所以加 KSTACKSIZE 和上面一样,proc->kstack 实际上存的是低位地址,加了 KSTACKSIZE 才是栈底,这样可以保证在每一个进程中断的时候,该进程对应的内核栈的栈底都是中断帧。</p>
<h2 id="第一个内核进程的创建"><a href="#第一个内核进程的创建" class="headerlink" title="第一个内核进程的创建"></a>第一个内核进程的创建</h2><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">// 创建init的主线程</span></span><br><span class="line"><span class="keyword">int</span> pid = kernel_thread(init_main, <span class="string">"Hello world!!"</span>, <span class="number">0</span>);</span><br><span class="line"><span class="keyword">if</span> (pid <= <span class="number">0</span>) {</span><br><span class="line"> panic(<span class="string">"create init_main failed.\n"</span>);</span><br><span class="line">}</span><br><span class="line"><span class="comment">// 通过pid 查找proc_struct</span></span><br><span class="line">initproc = find_proc(pid);</span><br><span class="line">set_proc_name(initproc, <span class="string">"init"</span>);</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="keyword">int</span></span><br><span class="line">kernel_thread(<span class="keyword">int</span> (*fn)(<span class="keyword">void</span> *), <span class="keyword">void</span> *arg, <span class="keyword">uint32_t</span> clone_flags) {</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">trapframe</span> <span class="title">tf</span>;</span></span><br><span class="line"> <span class="comment">//在当前栈上临时分配一个tf,随后这个值会被存到新建的内核栈里</span></span><br><span class="line"> <span class="built_in">memset</span>(&tf, <span class="number">0</span>, <span class="keyword">sizeof</span>(struct trapframe));</span><br><span class="line"> tf.tf_cs = KERNEL_CS;</span><br><span class="line"> tf.tf_ds = tf.tf_es = tf.tf_ss = KERNEL_DS;</span><br><span class="line"> <span class="comment">// ebx = fn</span></span><br><span class="line"> tf.tf_regs.reg_ebx = (<span class="keyword">uint32_t</span>)fn;</span><br><span class="line"> <span class="comment">// edx = arg</span></span><br><span class="line"> tf.tf_regs.reg_edx = (<span class="keyword">uint32_t</span>)arg;</span><br><span class="line"> <span class="comment">// eip = kernel_thread_entry</span></span><br><span class="line"> tf.tf_eip = (<span class="keyword">uint32_t</span>)kernel_thread_entry;</span><br><span class="line"> <span class="keyword">return</span> do_fork(clone_flags | CLONE_VM, <span class="number">0</span>, &tf);</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">int</span></span><br><span class="line">do_fork(<span class="keyword">uint32_t</span> clone_flags, <span class="keyword">uintptr_t</span> <span class="built_in">stack</span>, struct trapframe *tf) {</span><br><span class="line"> <span class="keyword">int</span> ret = -E_NO_FREE_PROC;</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">proc_struct</span> *<span class="title">proc</span>;</span></span><br><span class="line"> <span class="keyword">if</span> (nr_process >= MAX_PROCESS)</span><br><span class="line"> <span class="keyword">goto</span> fork_out;</span><br><span class="line"> ret = -E_NO_MEM;</span><br><span class="line"></span><br><span class="line"> <span class="comment">// 首先分配一个PCB</span></span><br><span class="line"> <span class="keyword">if</span> ((proc = alloc_proc()) == <span class="literal">NULL</span>)</span><br><span class="line"> <span class="keyword">goto</span> fork_out;</span><br><span class="line"> <span class="comment">// fork肯定存在父进程,所以设置子进程的父进程</span></span><br><span class="line"> proc->parent = current;</span><br><span class="line"> <span class="comment">// 分配内核栈(2页大小)</span></span><br><span class="line"> <span class="keyword">if</span> (setup_kstack(proc) != <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">goto</span> bad_fork_cleanup_proc;</span><br><span class="line"> <span class="comment">// 将所有虚拟页数据复制过去</span></span><br><span class="line"> <span class="keyword">if</span> (copy_mm(clone_flags, proc) != <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">goto</span> bad_fork_cleanup_kstack;</span><br><span class="line"> <span class="comment">// 复制线程的状态,包括寄存器上下文等等</span></span><br><span class="line"> copy_thread(proc, <span class="built_in">stack</span>, tf);</span><br><span class="line"> <span class="comment">// 将子进程的PCB添加进hash list或者list</span></span><br><span class="line"> <span class="comment">// 需要注意的是,不能让中断处理程序打断这一步操作</span></span><br><span class="line"> <span class="keyword">bool</span> intr_flag;</span><br><span class="line"> local_intr_save(intr_flag);</span><br><span class="line"> {</span><br><span class="line"> proc->pid = get_pid();</span><br><span class="line"> hash_proc(proc);</span><br><span class="line"> list_add(&proc_list, &(proc->list_link));</span><br><span class="line"> nr_process ++;</span><br><span class="line"> }</span><br><span class="line"> local_intr_restore(intr_flag);</span><br><span class="line"> <span class="comment">// 设置新的子进程可执行</span></span><br><span class="line"> wakeup_proc(proc);</span><br><span class="line"> <span class="comment">// 返回子进程的pid</span></span><br><span class="line"> ret = proc->pid;</span><br><span class="line"></span><br><span class="line">fork_out:</span><br><span class="line"> <span class="keyword">return</span> ret;</span><br><span class="line">bad_fork_cleanup_kstack:</span><br><span class="line"> put_kstack(proc);</span><br><span class="line">bad_fork_cleanup_proc:</span><br><span class="line"> kfree(proc);</span><br><span class="line"> <span class="keyword">goto</span> fork_out;</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">copy_thread(struct proc_struct *proc, <span class="keyword">uintptr_t</span> esp, struct trapframe *tf)</span><br><span class="line">{</span><br><span class="line"> proc->tf = (struct trapframe *)(proc->kstack + KSTACKSIZE) - <span class="number">1</span>;</span><br><span class="line"> *(proc->tf) = *tf; <span class="comment">// 结构体整体赋值</span></span><br><span class="line"> proc->tf->tf_regs.reg_eax = <span class="number">0</span>;</span><br><span class="line"> proc->tf->tf_esp = esp;</span><br><span class="line"> proc->tf->tf_eflags |= FL_IF;</span><br><span class="line"></span><br><span class="line"> proc->context.eip = (<span class="keyword">uintptr_t</span>)forkret;</span><br><span class="line"> proc->context.esp = (<span class="keyword">uintptr_t</span>)(proc->tf);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>注意,debug 可以发现:</p>
<p><code>(proc->kstack + KSTACKSIZE) - 1)</code> is 0xc0333fff<br><code>(struct trapframe *)(proc->kstack + KSTACKSIZE) - 1)</code> is 0xc0333fb4</p>
<p>两个地址的差刚好为一个 <code>sizeof(struct trapframe)</code> ,这表明指针强制类型转换将存储 tf 的指针往栈底抬了一个<code>sizeof(struct trapframe)</code>的大小,所以该进程对应的中断帧放在栈底是没有问题的,并没有越界。</p>
<p>经历完这一系列的初始化后,在<code>schedule</code>函数里会选中该进程去 run,调用<code>proc_run</code>中的<code>switch_to</code>,保存当前的寄存器内容到 idleproc 中的 context 中,又换 initmain 对应 context 中的 eip 来执行,即执行<code>forkret</code>函数(copy_thread 里设置的)</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">void</span></span><br><span class="line">forkret(<span class="keyword">void</span>)</span><br><span class="line">{</span><br><span class="line"> forkrets(current->tf);</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line">.globl forkrets</span><br><span class="line">forkrets:</span><br><span class="line"> # set stack to this new process's trapframe</span><br><span class="line"> movl <span class="number">4</span>(%esp), %esp</span><br><span class="line"> jmp __trapret</span><br><span class="line"></span><br><span class="line">.globl __trapret</span><br><span class="line">__trapret:</span><br><span class="line"> # 从中断帧中恢复所有的寄存器值</span><br><span class="line"> popal</span><br><span class="line"></span><br><span class="line"> <span class="meta"># restore %ds, %es, %fs and %gs</span></span><br><span class="line"> popl %gs</span><br><span class="line"> popl %fs</span><br><span class="line"> popl %es</span><br><span class="line"> popl %ds</span><br><span class="line"></span><br><span class="line"> <span class="meta"># get rid of the trap number and <span class="meta-keyword">error</span> code</span></span><br><span class="line"> addl $<span class="number">0x8</span>, %esp</span><br><span class="line"> iret</span><br></pre></td></tr></table></figure>
<p>这句 iret 应该会把 tf->tf_eip 当作返回地址弹出跳转到执行 kernel_thread_entry。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line">.text.</span><br><span class="line">.globl kernel_thread_entry</span><br><span class="line">kernel_thread_entry: <span class="meta"># void kernel_thread(void)</span></span><br><span class="line"></span><br><span class="line"> pushl %edx <span class="meta"># push arg</span></span><br><span class="line"> call *%ebx <span class="meta"># call fn</span></span><br><span class="line"></span><br><span class="line"> pushl %eax <span class="meta"># save the return value of fn(arg)</span></span><br><span class="line"> call do_exit <span class="meta"># call do_exit to terminate current thread</span></span><br></pre></td></tr></table></figure>
<p>ebx 存了要执行的函数地址 edx 存了函数参数,调用完对应的函数后取返回值 eax,最后执行 do_exit 释放进程占用资源并退出。</p>
<h2 id="tf-and-context"><a href="#tf-and-context" class="headerlink" title="tf and context"></a>tf and context</h2><p>struct context context:储存进程当前状态,用于进程切换中上下文的保存与恢复。</p>
<p>需要注意的是,与 trapframe 所保存的用户态上下文不同,context 保存的是线程的当前上下文。这个上下文可能是执行用户代码时的上下文,也可能是执行内核代码时的上下文。</p>
<p><code>struct trapframe* tf</code>:无论是用户程序在用户态通过系统调用进入内核态,还是线程在内核态中被创建,内核态中的线程返回用户态所加载的上下文就是<code>struct trapframe* tf</code>。 所以当一个线程在内核态中建立,则该新线程就必须伪造一个 trapframe 来返回用户态。</p>
<p>思考一下,从用户态进入内核态会压入当时的用户态上下文 trapframe。</p>
<p>两者关系:以 kernel_thread 函数为例,尽管该函数设置了 proc->trapframe,但在 fork 函数中的 copy_thread 函数里,程序还会设置 proc->context。两个上下文看上去好像冗余,但实际上两者所分的工是不一样的。</p>
<p>进程之间通过进程调度来切换控制权,当某个 fork 出的新进程获取到了控制流后,首当其中执行的代码是 current->context->eip 所指向的代码,此时新进程仍处于内核态,但实际上我们想在用户态中执行代码,所以我们需要从内核态切换回用户态,也就是中断返回。此时会遇上两个问题:</p>
<p>新进程如何执行中断返回? 这就是 proc->context.eip = (uintptr_t)forkret 的用处。forkret 会使新进程正确的从中断处理例程中返回。</p>
<p>新进程中断返回至用户代码时的上下文为? 这就是 proc_struct->tf 的用处。中断返回时,新进程会恢复保存的 trapframe 信息至各个寄存器中,然后开始执行用户代码。</p>
<h2 id="local-intr-save-and-local-intr-restore"><a href="#local-intr-save-and-local-intr-restore" class="headerlink" title="local_intr_save and local_intr_restore"></a>local_intr_save and local_intr_restore</h2><p>语句 local_intr_save(intr_flag);….local_intr_restore(intr_flag);在这里有何作用?请说明理由。</p>
<p>这两句代码的作用分别是阻塞中断和解除中断的阻塞。<br>这两句的配合,使得这两句代码之间的代码块形成原子操作,可以使得某些关键的代码不会被打断,从而避免引起一些未预料到的错误,避免条件竞争。<br>以进程切换为例,在 proc_run 中,当刚设置好 current 指针为下一个进程,但还未完全将控制权转移时,如果该过程突然被一个中断所打断,则中断处理例程的执行可能会引发异常,因为 current 指针指向的进程与实际使用的进程资源不一致。</p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="http://example.com/2021/01/24/Lab1/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="lagrange">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="lagrange's blog">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2021/01/24/Lab1/" class="post-title-link" itemprop="url">Lab1 实验报告</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2021-01-24 10:47:34" itemprop="dateCreated datePublished" datetime="2021-01-24T10:47:34Z">2021-01-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2021-04-23 00:26:39" itemprop="dateModified" datetime="2021-04-23T00:26:39Z">2021-04-23</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/OS/" itemprop="url" rel="index"><span itemprop="name">OS</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1 id="Lab1-实验报告"><a href="#Lab1-实验报告" class="headerlink" title="Lab1 实验报告"></a>Lab1 实验报告</h1><p><code>/labcodes_answer/lab1_result/</code> 目录下的代码,在用较新版本(好像是 GCC 5.x 开始)就会出现生成的 <code>bootloader</code> 二进制文件过大无法塞入第一个扇区的问题,这种情况只需要将 <code>/labcodes_answer/lab1_result/boot/bootmain.c</code>中的全局变量改为用宏定义即可编译通过。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">unsigned</span> <span class="keyword">int</span> SECTSIZE = <span class="number">512</span> ;</span><br><span class="line">改为</span><br><span class="line"><span class="meta">#<span class="meta-keyword">define</span> SECTSIZE 512</span></span><br></pre></td></tr></table></figure>
<h2 id="make-生成执行文件的过程"><a href="#make-生成执行文件的过程" class="headerlink" title="make 生成执行文件的过程"></a>make 生成执行文件的过程</h2><h3 id="操作系统镜像文件-ucore-img-是如何一步一步生成的"><a href="#操作系统镜像文件-ucore-img-是如何一步一步生成的" class="headerlink" title="操作系统镜像文件 ucore.img 是如何一步一步生成的"></a>操作系统镜像文件 ucore.img 是如何一步一步生成的</h3><p>Makefile 的终极目标在第 207 行被显式指定为 205 行的 <code>TARGETS</code> ,而 <code>TARGETS</code> 的依赖为 <code>$(TARGETS)</code> ,这个变量在 Makefile 只是空的,但是会在 <code>tools/function.mk</code> 中的 <code>do_create_target</code> 宏中被修改, <code>do_create_target</code> 被函数 <code>create_target</code> 直接调用。因此在 Makefile 中只要调用了 <code>create_target</code> 就会为 <code>$(TARGETS)</code> 增添新的一项。</p>
<p>经过一系列的 <code>create_target</code> , <code>$(TARGETS)</code> 最终值为 <code>bin/kernel bin/bootblock bin/sign bin/ucore.img</code></p>
<p>首先看<code>bin/kernel</code>的文件依赖</p>
<figure class="highlight makefile"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="variable">$(kernel)</span>: <span class="variable">$(KOBJS)</span> tools/kernel.ld</span><br><span class="line"> @echo + ld <span class="variable">$@</span></span><br><span class="line"> <span class="variable">$(V)</span><span class="variable">$(LD)</span> <span class="variable">$(LDFLAGS)</span> -T tools/kernel.ld -o <span class="variable">$@</span> <span class="variable">$(KOBJS)</span></span><br><span class="line"> <span class="comment">#最终的内核文件应该去除符号表等信息,并输出符号表信息,汇编文件信息,和输出信息</span></span><br><span class="line"> @<span class="variable">$(OBJDUMP)</span> -S <span class="variable">$@</span> > <span class="variable">$(<span class="built_in">call</span> asmfile,kernel)</span></span><br><span class="line"> @<span class="variable">$(OBJDUMP)</span> -t <span class="variable">$@</span> | <span class="variable">$(SED)</span> '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > <span class="variable">$(<span class="built_in">call</span> symfile,kernel)</span></span><br></pre></td></tr></table></figure>
<p>显示这段代码执行的输出可以看到</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">+ ld bin/kernel</span><br><span class="line">ld -m elf_i386 -nostdlib -T tools/kernel.ld -o bin/kernel obj/kern/init/init.o obj/kern/libs/readline.o obj/kern/libs/stdio.o obj/kern/debug/kdebug.o obj/kern/debug/kmonitor.o obj/kern/debug/panic.o obj/kern/driver/clock.o obj/kern/driver/console.o obj/kern/driver/intr.o obj/kern/driver/picirq.o obj/kern/trap/trap.o obj/kern/trap/trapentry.o obj/kern/trap/vectors.o obj/kern/mm/pmm.o obj/libs/printfmt.o obj/libs/string.o</span><br></pre></td></tr></table></figure>
<p>ld 命令参数:</p>
<ul>
<li>m <emulation> 模拟为 i386 上的连接器</li>
<li>nostdlib 不要在标准系统目录中寻找头文件.只搜索`-I’选项指定的目录(以及当前目录,如果合适).</li>
<li>T <scriptfile> 让连接器使用指定的脚本</li>
</ul>
<p>所谓的<code>KOBJS</code>就是那串跟在<code>-o</code> 后面的,在<code>\lib</code>,<code>\kern</code>文件夹中所有.c .S 文件生成.o 二进制文件。通过<code>ld</code>的链接指令完成了 <code>bin/kernel</code> 文件的生成。 至于下面的的使用<code>@</code>隐藏输出的<code>OBJDUMP</code>,应该是删符号表等信息。</p>
<p>至于在<code>ld</code>命令前的.o 文件生成指令</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">+ cc kern/init/init.c</span><br><span class="line">gcc -Ikern/init/ -fno-builtin -Wall -ggdb -m32 -gstabs -nostdinc -fno-stack-protector -Ilibs/ -Ikern/debug/ -Ikern/driver/ -Ikern/trap/ -Ikern/mm/ -c kern/init/init.c -o obj/kern/init/init.o</span><br></pre></td></tr></table></figure>
<p>gcc 参数:</p>
<ul>
<li>-fno-builtin 除非用 __builtin__ 前缀,否则不进行 builtin 函数的优化</li>
<li>-Wall 选项意思是编译后显示所有警告。</li>
<li>-ggdb 生成可供 gdb 使用的调试信息。这样才能用 qemu+gdb 来调试 bootloader or ucore。</li>
<li>-m32 生成适用于 32 位环境的代码。我们用的模拟硬件是 32bit 的 80386,所以 ucore 也要是 32 位的软件。</li>
<li>-gstabs 生成 stabs 格式的调试信息。这样要 ucore 的 monitor 可以显示出便于开发者阅读的函数调用栈信息</li>
<li>-nostdinc 不使用标准库。标准库是给应用程序用的,我们是编译 ucore 内核,OS 内核是提供服务的,所以所有的服务要自给自足。</li>
<li>-fno-stack-protector 不生成用于检测缓冲区溢出的代码。这是 for 应用程序的,我们是编译内核,ucore 内核好像还用不到此功能。</li>
<li>-I<dir> 添加搜索头文件的路径</li>
<li>-c Compile and assemble, but do not link.</li>
</ul>
<p>是在第 126 行左右执行的<code>$(call add_files_cc,$(call listf_cc,$(KSRCDIR)),kernel,$(KCFLAGS))</code></p>
<p>该命令对所有 kern 和 lib 下的 .c .S 文件执行了编译操作生成.o 文件。</p>
<p>紧接上面,之后的输出</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">+ cc boot/bootasm.S</span><br><span class="line">gcc -Iboot/ -fno-builtin -Wall -ggdb -m32 -gstabs -nostdinc -fno-stack-protector -Ilibs/ -Os -nostdinc -c boot/bootasm.S -o obj/boot/bootasm.o</span><br><span class="line">+ cc boot/bootmain.c</span><br><span class="line">gcc -Iboot/ -fno-builtin -Wall -ggdb -m32 -gstabs -nostdinc -fno-stack-protector -Ilibs/ -Os -nostdinc -c boot/bootmain.c -o obj/boot/bootmain.o</span><br></pre></td></tr></table></figure>
<p>gcc 参数:</p>
<ul>
<li>-Os 为减小代码大小而进行优化。根据硬件 spec,主引导扇区只有 512 字节,我们写的简单 bootloader 的最终大小不能大于 510 字节。</li>
</ul>
<p>来自于下面两条指令</p>
<figure class="highlight makefile"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># create bootblock</span></span><br><span class="line">bootfiles = <span class="variable">$(<span class="built_in">call</span> listf_cc,boot)</span></span><br><span class="line"><span class="variable">$(<span class="built_in">foreach</span> f,<span class="variable">$(bootfiles)</span>,$(<span class="built_in">call</span> cc_compile,<span class="variable">$(f)</span>,<span class="variable">$(CC)</span>,<span class="variable">$(CFLAGS)</span> -Os -nostdinc)</span>)</span><br></pre></td></tr></table></figure>
<p>第一条语句找出<code>\boot</code>文件夹下的所有以.S .c 结尾的文件<code>bootasm.S</code>,<code>bootmain.c</code>,第二句话对上面找出的两个文件执行编译。 其中<code>bootasm.S</code>依赖于<code>\boot</code>文件夹下的的<code>asm.h</code>头文件。引入该头文件的方法是在 gcc 编译指令中加上<code>-Iboot/</code>这个参数。<code>-I<dir> 添加搜索头文件的路径</code>,根据这个参数可以找到位于<code>\boot</code>文件夹下的的<code>asm.h</code>头文件</p>
<p>下面的输出是对 sign 工具的编译。虽然在 makefile 文件下紧接着的是对 bootblock 的编译,但是因为 bootblock 依赖于 sign 工具,故首先执行 sign 工具的编译操作。</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">+ cc tools/sign.c</span><br><span class="line">gcc -Itools/ -g -Wall -O2 -c tools/sign.c -o obj/sign/tools/sign.o</span><br><span class="line">gcc -g -Wall -O2 obj/sign/tools/sign.o -o bin/sign</span><br></pre></td></tr></table></figure>
<figure class="highlight makefile"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># create 'sign' tools</span></span><br><span class="line"><span class="variable">$(<span class="built_in">call</span> add_files_host,tools/sign.c,sign,sign)</span></span><br><span class="line"><span class="variable">$(<span class="built_in">call</span> create_target_host,sign,sign)</span></span><br></pre></td></tr></table></figure>
<p>该生成由以上两句语句生成并输出</p>
<p>解决了 sign 工具的依赖后开始生成 bootblock,输出如下:</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">+ ld bin/bootblock</span><br><span class="line">ld -m elf_i386 -nostdlib -N -e start -Ttext 0x7C00 obj/boot/bootasm.o obj/boot/bootmain.o -o obj/bootblock.o</span><br><span class="line">'obj/bootblock.out' size: 488 bytes</span><br><span class="line">build 512 bytes boot sector: 'bin/bootblock' success!</span><br></pre></td></tr></table></figure>
<p>新出现的 ld 命令参数:</p>
<ul>
<li>e <entry> 指定入口</li>
<li>N 设置代码段和数据段均可读写</li>
<li>Ttext 制定代码段开始位置,0x7C00 就是 bios 执行完后程序开始执行的位置</li>
</ul>
<p>对应 makefile</p>
<figure class="highlight makefile"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line"><span class="variable">$(bootblock)</span>: <span class="variable">$(<span class="built_in">call</span> toobj,<span class="variable">$(bootfiles)</span>)</span> | <span class="variable">$(<span class="built_in">call</span> totarget,sign)</span></span><br><span class="line"> @echo + ld <span class="variable">$@</span></span><br><span class="line"> <span class="variable">$(V)</span><span class="variable">$(LD)</span> <span class="variable">$(LDFLAGS)</span> -N -e start -Ttext 0x7C00 <span class="variable">$^</span> -o <span class="variable">$(<span class="built_in">call</span> toobj,bootblock)</span></span><br><span class="line"> @<span class="variable">$(OBJDUMP)</span> -S <span class="variable">$(<span class="built_in">call</span> objfile,bootblock)</span> > <span class="variable">$(<span class="built_in">call</span> asmfile,bootblock)</span></span><br><span class="line"> @<span class="variable">$(OBJDUMP)</span> -t <span class="variable">$(<span class="built_in">call</span> objfile,bootblock)</span> | <span class="variable">$(SED)</span> '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > <span class="variable">$(<span class="built_in">call</span> symfile,bootblock)</span></span><br><span class="line"> @<span class="variable">$(OBJCOPY)</span> -S -O binary <span class="variable">$(<span class="built_in">call</span> objfile,bootblock)</span> <span class="variable">$(<span class="built_in">call</span> outfile,bootblock)</span></span><br><span class="line"> @<span class="variable">$(<span class="built_in">call</span> totarget,sign)</span> <span class="variable">$(<span class="built_in">call</span> outfile,bootblock)</span> <span class="variable">$(bootblock)</span></span><br></pre></td></tr></table></figure>
<p>objcopy 拷贝二进制代码 bootblock.o 到 bootblock.out:</p>
<ul>
<li>-S 移除所有符号和重定位信息</li>
<li>-O <bfdname> 指定输出格式</li>
</ul>
<p>调用 sign 进行签名(这里只显示了 sign 程序执行时的输出,并没有显示调用 sign 程序的指令):</p>
<p><code>sign bootblock.out bootblock</code>(大概是这个意思)</p>
<p>sign 函数执行的逻辑就是在文件小于 510 个字节的情况下将最后两个字节置为 0x55AA 标志该启动扇区是合法的。</p>
<p>执行以上代码后完成 bootblock 的构造,最后执行 ucore.img 的构造</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><span class="line">dd if=/dev/zero of=bin/ucore.img count=10000</span><br><span class="line"></span><br><span class="line">10000+0 records in</span><br><span class="line">10000+0 records out</span><br><span class="line">5120000 bytes (5.1 MB) copied, 0.0213187 s, 240 MB/s</span><br><span class="line"></span><br><span class="line">dd if=bin/bootblock of=bin/ucore.img conv=notrunc</span><br><span class="line"></span><br><span class="line">1+0 records in</span><br><span class="line">1+0 records out</span><br><span class="line">512 bytes (512 B) copied, 8.997e-05 s, 5.7 MB/s</span><br><span class="line"></span><br><span class="line">dd if=bin/kernel of=bin/ucore.img seek=1 conv=notrunc</span><br><span class="line"></span><br><span class="line">146+1 records in</span><br><span class="line">146+1 records out</span><br><span class="line">74923 bytes (75 kB) copied, 0.000318176 s, 235 MB/s</span><br></pre></td></tr></table></figure>
<figure class="highlight makefile"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">UCOREIMG := <span class="variable">$(<span class="built_in">call</span> totarget,ucore.img)</span></span><br><span class="line"></span><br><span class="line"><span class="variable">$(UCOREIMG)</span>: <span class="variable">$(kernel)</span> <span class="variable">$(bootblock)</span></span><br><span class="line"> <span class="variable">$(V)</span>dd if=/dev/zero of=<span class="variable">$@</span> count=10000</span><br><span class="line"> <span class="variable">$(V)</span>dd if=<span class="variable">$(bootblock)</span> of=<span class="variable">$@</span> conv=notrunc</span><br><span class="line"> <span class="variable">$(V)</span>dd if=<span class="variable">$(kernel)</span> of=<span class="variable">$@</span> seek=1 conv=notrunc</span><br></pre></td></tr></table></figure>
<ol>
<li>生成一个有 10000 个块的文件,每个块默认 512 字节,用 0 填充<br>dd if=/dev/zero of=bin/ucore.img count=10000</li>
<li>把 bootblock 中的内容写到第一个块<br>dd if=bin/bootblock of=bin/ucore.img conv=notrunc</li>
<li>从第二个块开始写 kernel 中的内容<br>dd if=bin/kernel of=bin/ucore.img seek=1 conv=notrunc</li>
</ol>
<blockquote>
<p>/dev/zero : 在类 UNIX 操作系统中, /dev/zero 是一个特殊的文件,当你读它的时候,它会提供无限的空字符(NULL, ASCII NUL, 0x00)。其中的一个典型用法是用它提供的字符流来覆盖信息,另一个常见用法是产生一个特定大小的空白文件。BSD 就是通过 mmap 把/dev/zero 映射到虚地址空间实现共享内存的。可以使用 mmap 将/dev/zero 映射到一个虚拟的内存空间,这个操作的效果等同于使用一段匿名的内存(没有和任何文件相关)。</p>
</blockquote>
<blockquote>
<p>if=文件名:输入文件名,默认为标准输入。即指定源文件。<br>of=文件名:输出文件名,默认为标准输出。即指定目的文件。<br>count=blocks:仅拷贝 blocks 个块,块大小等于 ibs 指定的字节数。 notrunc:不截短输出文件</p>
</blockquote>
<h3 id="一个被系统认为是符合规范的硬盘主引导扇区的特征是什么"><a href="#一个被系统认为是符合规范的硬盘主引导扇区的特征是什么" class="headerlink" title="一个被系统认为是符合规范的硬盘主引导扇区的特征是什么"></a>一个被系统认为是符合规范的硬盘主引导扇区的特征是什么</h3><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">#<span class="meta-keyword">include</span> <span class="meta-string"><stdio.h></span></span></span><br><span class="line"><span class="meta">#<span class="meta-keyword">include</span> <span class="meta-string"><errno.h></span></span></span><br><span class="line"><span class="meta">#<span class="meta-keyword">include</span> <span class="meta-string"><string.h></span></span></span><br><span class="line"><span class="meta">#<span class="meta-keyword">include</span> <span class="meta-string"><sys/stat.h></span></span></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">main</span><span class="params">(<span class="keyword">int</span> argc, <span class="keyword">char</span> *argv[])</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> <span class="class"><span class="keyword">struct</span> <span class="title">stat</span> <span class="title">st</span>;</span></span><br><span class="line"> <span class="keyword">if</span> (argc != <span class="number">3</span>)</span><br><span class="line"> {</span><br><span class="line"> <span class="built_in">fprintf</span>(<span class="built_in">stderr</span>, <span class="string">"Usage: <input filename> <output filename>\n"</span>);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">-1</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 获取输入文件的状态</span></span><br><span class="line"> <span class="keyword">if</span> (stat(argv[<span class="number">1</span>], &st) != <span class="number">0</span>) <span class="comment">//不存在报错</span></span><br><span class="line"> {</span><br><span class="line"> <span class="built_in">fprintf</span>(<span class="built_in">stderr</span>, <span class="string">"Error opening file '%s': %s\n"</span>, argv[<span class="number">1</span>], strerror(errno));</span><br><span class="line"> <span class="keyword">return</span> <span class="number">-1</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="built_in">printf</span>(<span class="string">"'%s' size: %lld bytes\n"</span>, argv[<span class="number">1</span>], (<span class="keyword">long</span> <span class="keyword">long</span>)st.st_size);</span><br><span class="line"> <span class="keyword">if</span> (st.st_size > <span class="number">510</span>) <span class="comment">//文件大于510个字节报错,虽然启动扇区能够装512个字节,但最后两个字节必须是0x55AA来标志该扇区为合法的启动扇区,所以最多装510个</span></span><br><span class="line"> {</span><br><span class="line"> <span class="built_in">fprintf</span>(<span class="built_in">stderr</span>, <span class="string">"%lld >> 510!!\n"</span>, (<span class="keyword">long</span> <span class="keyword">long</span>)st.st_size);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">-1</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">char</span> buf[<span class="number">512</span>];</span><br><span class="line"> <span class="built_in">memset</span>(buf, <span class="number">0</span>, <span class="keyword">sizeof</span>(buf));</span><br><span class="line"> FILE *ifp = fopen(argv[<span class="number">1</span>], <span class="string">"rb"</span>); <span class="comment">//argv[0]是程序全名,argv[1]是输入文件,即以读二进制文件的方法打开输入文件</span></span><br><span class="line"> <span class="keyword">int</span> size = fread(buf, <span class="number">1</span>, st.st_size, ifp); <span class="comment">//从ifp读st.st_size个对象,每个对象读一次到buf中</span></span><br><span class="line"> <span class="keyword">if</span> (size != st.st_size)</span><br><span class="line"> {</span><br><span class="line"> <span class="built_in">fprintf</span>(<span class="built_in">stderr</span>, <span class="string">"read '%s' error, size is %d.\n"</span>, argv[<span class="number">1</span>], size);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">-1</span>;</span><br><span class="line"> }</span><br><span class="line"> fclose(ifp);</span><br><span class="line"> buf[<span class="number">510</span>] = <span class="number">0x55</span>; <span class="comment">//把最后两个字节写成0x55AA</span></span><br><span class="line"> buf[<span class="number">511</span>] = <span class="number">0xAA</span>;</span><br><span class="line"> FILE *ofp = fopen(argv[<span class="number">2</span>], <span class="string">"wb+"</span>);</span><br><span class="line"> size = fwrite(buf, <span class="number">1</span>, <span class="number">512</span>, ofp); <span class="comment">//写回到输出</span></span><br><span class="line"> <span class="keyword">if</span> (size != <span class="number">512</span>)</span><br><span class="line"> {</span><br><span class="line"> <span class="built_in">fprintf</span>(<span class="built_in">stderr</span>, <span class="string">"write '%s' error, size is %d.\n"</span>, argv[<span class="number">2</span>], size);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">-1</span>;</span><br><span class="line"> }</span><br><span class="line"> fclose(ofp);</span><br><span class="line"> <span class="built_in">printf</span>(<span class="string">"build 512 bytes boot sector: '%s' success!\n"</span>, argv[<span class="number">2</span>]);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>符合规范的硬盘主引导扇区 size=512bytes,并且第 511 个 byte 值为 0x55,第 512 个 byte 的值为 0xAA</p>
<h2 id="操作系统启动过程"><a href="#操作系统启动过程" class="headerlink" title="操作系统启动过程"></a>操作系统启动过程</h2><ol>
<li>x86 PC 刚开机时 CPU 处于实模式</li>
<li>开机时,CS=0xFFFF; IP=0x0000</li>
<li>寻址 0xFFFF0(ROM BIOS 映射区)</li>
<li>检查 RAM,键盘,显示器,软硬磁盘</li>
<li>将磁盘 0 磁道 0 扇区读入 0x7c00 处</li>
<li>设置 cs=0x07c0,ip=0x0000</li>
</ol>
<h2 id="分析-bootloader-进入保护模式的过程"><a href="#分析-bootloader-进入保护模式的过程" class="headerlink" title="分析 bootloader 进入保护模式的过程"></a>分析 bootloader 进入保护模式的过程</h2><h3 id="为何开启-A20,以及如何开启-A20"><a href="#为何开启-A20,以及如何开启-A20" class="headerlink" title="为何开启 A20,以及如何开启 A20"></a>为何开启 A20,以及如何开启 A20</h3><p>在 i8086 时代,CPU 的数据总线是 16bit,地址总线是 20bit,寄存器是 16bit,因此 CPU 只能访问 1MB 以内的空间。因为数据总线和寄存器只有 16bit,如果需要获取 20bit 的数据, 需要 segment(每个 segment 大小恒定为 64K)左移 4 位再加上 offset 组成一个 20bit 的地址。理论上,20bit 的地址可以访问 1MB 的内存空间(0x00000 - (2^20 - 1 = 0xFFFFF))。但在实模式下, 这 20bit 的地址理论上能访问从 0x00000 - (0xFFFF0 + 0xFFFF = 0x10FFEF)的内存空间。也就是说,理论上我们可以访问超过 1MB 的内存空间,但越过 0xFFFFF 后,地址又会回到 0x00000。上面这个特征在 i8086 中是没有任何问题的(因为它最多只能访问 1MB 的内存空间),但到了 i80286/i80386 后,CPU 有了更宽的地址总线,数据总线和寄存器后,这就会出现一个问题: 在实模式下, 我们可以访问超过 1MB 的空间,但我们只希望访问 1MB 以内的内存空间。为了解决这个问题, CPU 中添加了一个可控制 A20 地址线的模块,通过这个模块,我们在实模式下将第 20bit 的地址线限制为 0,这样 CPU 就不能访问超过 1MB 的空间了。进入保护模式后,我们再通过这个模块解除对 A20 地址线的限制,这样我们就能访问超过 1MB 的内存空间了。</p>
<p>现在使用的 CPU 都是通过键盘控制器 8042 (端口 0x64 和 0x60 连着键盘控制器) 来控制 A20 地址线。默认情况下,A20 地址线是关闭的(限制只能访问 1M 内存),因此在进入保护模式(需要访问超过 1MB 的内存空间)前,我们需要开启 A20 地址线(第 20bit 的地址线可为 0 或者 1)。开启代码在 bootasm.S 文件中。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><span class="line">seta20.1:</span><br><span class="line"> inb $0x64, %al # Wait for not busy(8042 input buffer empty).</span><br><span class="line"> testb $0x2, %al</span><br><span class="line"> jnz seta20.1</span><br><span class="line"></span><br><span class="line"> movb $0xd1, %al # 0xd1 -> port 0x64</span><br><span class="line"> outb %al, $0x64 # 0xd1 means: write data to 8042's P2 port</span><br><span class="line"></span><br><span class="line">seta20.2:</span><br><span class="line"> inb $0x64, %al # Wait for not busy(8042 input buffer empty).</span><br><span class="line"> testb $0x2, %al</span><br><span class="line"> jnz seta20.2</span><br><span class="line"></span><br><span class="line"> movb $0xdf, %al # 0xdf -> port 0x60</span><br><span class="line"> outb %al, $0x60 # 0xdf = 11011111, means set P2's A20 bit(the 1 bit) to 1</span><br></pre></td></tr></table></figure>
<p>cpu 可以直接读写以下三个地方的数据,读写三个地方的指令都是不同的,他们的空间也是分开的。</p>
<ol>
<li>端口</li>
<li>内存</li>
<li>寄存器</li>
</ol>
<p>对外设的控制都是通过读写对应外设的端口来完成的。对端口的读写汇编指令只有 in 和 out。</p>
<p>由于当时的 8042 键盘控制器上恰好有空闲的端口引脚(输出端口 P2,引脚 P21),于是便使用了该引脚来作为与门控制这个地址比特位。该信号即被称为 A20。如果它为零,则比特 20 及以上地址都被清除。从而实现了兼容性。</p>
<p>当 A20 地址线控制禁止时,程序就像运行在 8086 上,1MB 以上的地址是不可访问的,只能访问奇数 MB 的不连续的地址。为了使能所有地址位的寻址能力,必须向键盘控制器 8082 发送一个命令,键盘控制器 8042 会将 A20 线置于高电位,使全部 32 条地址线可用,实现访问 4GB 内存。</p>
<p>控制 A20 gate 的方法有 3 种:</p>
<ol>
<li>804x 键盘控制器法</li>
<li>Fast A20 法</li>
<li>BIOS 中断法</li>
</ol>
<p>ucore 实验中用了第一种 804x 键盘控制器法,这也是最古老且效率最慢的一种。由于在机器启动时,默认条件下,A20 地址线是禁止的,所以操作系统必须使用适当的方法来开启它。</p>
<p>等待 8042 Input buffer 为空;<br>发送 Write 8042 Output Port (P2)命令到 8042 Input buffer;<br>等待 8042 Input buffer 为空;<br>将 8042 Output Port(P2)得到字节的第 2 位置 1,然后写入 8042 Input buffer</p>
<h3 id="如何初始化-GDT-表"><a href="#如何初始化-GDT-表" class="headerlink" title="如何初始化 GDT 表"></a>如何初始化 GDT 表</h3><p>在保护模式下,x86 CPU 通过 GDT 表访问内存,我们根据 CPU 给的逻辑地址分离出段选择子。利用这个段选择子选择一个段描述符。将段描述符里的 Base Address 和段选择子的偏移量相加而得到线性地址。这个地址就是我们需要的地址。</p>
<p><img src="https://segmentfault.com/img/remote/1460000009386096?w=640&h=300" alt="GDT"></p>
<p>在实模式下,通过 segment + offset 的方式一个程序可以访问内存中的任意一个地址,但是开启了保护模式之后,段选择子和段描述符中都有了特权级的概念,程序不能随意访问高特权级的段内容。段表有固定的格式被放到内存中,CPU 使用全局描述符表寄存器 GDTR 保存段表起始地址。GDTR 长 48 位,其中高 32 位为基地址,低 16 位为段界限。这里只需要载入已经静态存储在引导区的 GDT 表和其描述符到 GDTR 寄存器。理论上 GDT 可以存在内存中任何位置,但这里我们是在实模式下初始化 GDT 的,因此 GDT 应该是存在最低的这 1MB 内存空间中。CPU 通过 lgdt 指令读入 GDT 的地址,之后我们就可以使用 GDT 了。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br><span class="line">93</span><br><span class="line">94</span><br><span class="line">95</span><br><span class="line">96</span><br><span class="line">97</span><br><span class="line">98</span><br><span class="line">99</span><br><span class="line">100</span><br><span class="line">101</span><br><span class="line">102</span><br><span class="line">103</span><br><span class="line">104</span><br><span class="line">105</span><br><span class="line">106</span><br><span class="line">107</span><br><span class="line">108</span><br><span class="line">109</span><br><span class="line">110</span><br><span class="line">111</span><br><span class="line">112</span><br><span class="line">113</span><br><span class="line">114</span><br><span class="line">115</span><br><span class="line">116</span><br></pre></td><td class="code"><pre><span class="line">#注释:#include <asm.h></span><br><span class="line">asm.h头文件中包含了一些宏定义,用于定义gdt,gdt是保护模式使用的全局段描述符表,其中存储着段描述符。</span><br><span class="line"># Start the CPU: switch to 32-bit protected mode, jump into C.</span><br><span class="line"># The BIOS loads this code from the first sector of the hard disk into</span><br><span class="line"># memory at physical address 0x7c00 and starts executing in real mode</span><br><span class="line"># with %cs=0 %ip=7c00.</span><br><span class="line">此段注释说明了要完成的目的:启动保护模式,转入C函数。</span><br><span class="line">这里正好说了一下bootasm.S文件的作用。计算机加电后,由BIOS将bootasm.S生成的可执行代码从硬盘的第一个扇区复制到内存中的物理地址0x7c00处,并开始执行。</span><br><span class="line">此时系统处于实模式。可用内存不多于1M。</span><br><span class="line"></span><br><span class="line">.set PROT_MODE_CSEG, 0x8 # kernel code segment selector</span><br><span class="line">.set PROT_MODE_DSEG, 0x10 # kernel data segment selector</span><br><span class="line">这两个段选择子的作用其实是提供了gdt中代码段和数据段的索引</span><br><span class="line">.set CR0_PE_ON, 0x1 # protected mode enable flag</span><br><span class="line">这个变量是开启A20地址线的标志,为1是开启保护模式</span><br><span class="line"></span><br><span class="line"># start address should be 0:7c00, in real mode, the beginning address of the running bootloader</span><br><span class="line">.globl start</span><br><span class="line">start:</span><br><span class="line">这两行代码相当于定义了C语言中的main函数,start就相当于main,BIOS调用程序时,从这里开始执行</span><br><span class="line">.code16 # Assemble for 16-bit mode</span><br><span class="line">因为以下代码是在实模式下执行,所以要告诉编译器使用16位模式编译。</span><br><span class="line"> cli # Disable interrupts</span><br><span class="line"> cld # String operations increment</span><br><span class="line">关中断,设置字符串操作是递增方向。cld的作用是将direct flag标志位清零,这意味着自动增加源索引和目标索引的指令(如MOVS)将同时增加它们。</span><br><span class="line"></span><br><span class="line"> # Set up the important data segment registers (DS, ES, SS).</span><br><span class="line"> xorw %ax, %ax # Segment number zero</span><br><span class="line">ax寄存器就是eax寄存器的低十六位,使用xorw清零ax,效果相当于movw $0, %ax。 但是好像xorw性能好一些,google了一下没有得到好答案</span><br><span class="line"> movw %ax, %ds # -> Data Segment</span><br><span class="line"> movw %ax, %es # -> Extra Segment</span><br><span class="line"> movw %ax, %ss # -> Stack Segment</span><br><span class="line">将段选择子清零</span><br><span class="line"> # Enable A20:</span><br><span class="line"> # For backwards compatibility with the earliest PCs, physical</span><br><span class="line"> # address line 20 is tied low, so that addresses higher than</span><br><span class="line"> # 1MB wrap around to zero by default. This code undoes this.</span><br><span class="line">准备工作就绪,下面开始动真格的了,激活A20地址位。先翻译注释:由于需要兼容早期pc,物理地址的第20位绑定为0,所以高于1MB的地址又回到了0x00000.</span><br><span class="line">好了,激活A20后,就可以访问所有4G内存了,就可以使用保护模式了。</span><br><span class="line"></span><br><span class="line">怎么激活呢,由于历史原因A20地址位由键盘控制器芯片8042管理。所以要给8042发命令激活A20</span><br><span class="line">8042有两个IO端口:0x60和0x64, 激活流程位: 发送0xd1命令到0x64端口 --> 发送0xdf到0x60,done!</span><br><span class="line"># seta20.1这些破东西叫标号。标号有唯一的名字加冒号组成。它可以出现在汇编程序的任何地方,并与紧跟其后的哪行代码具有相同的地址。概括的说 ,当程序中要跳转到另一位置时,需要有一个标识来指示新的位置,这就是标号,通过在目标地址的前面放上一个标号,可以在指令中使用标号来代替直接使用地址。</span><br><span class="line">seta20.1:</span><br><span class="line"> inb $0x64, %al # Wait for not busy(8042 input buffer empty).</span><br><span class="line"> testb $0x2, %al</span><br><span class="line"> jnz seta20.1</span><br><span class="line">#发送命令之前,要等待键盘输入缓冲区为空,这通过8042的状态寄存器的第2bit来观察,而状态寄存器的值可以读0x64端口得到。</span><br><span class="line">#上面的指令的意思就是,如果状态寄存器的第2位为1,就跳到seta20.1符号处执行,知道第2位为0,代表缓冲区为空</span><br><span class="line"></span><br><span class="line"> movb $0xd1, %al # 0xd1 -> port 0x64</span><br><span class="line"> outb %al, $0x64 # 0xd1 means: write data to 8042's P2 port</span><br><span class="line">发送0xd1到0x64端口</span><br><span class="line"></span><br><span class="line">seta20.2:</span><br><span class="line"> inb $0x64, %al # Wait for not busy(8042 input buffer empty).</span><br><span class="line"> testb $0x2, %al</span><br><span class="line"> jnz seta20.2</span><br><span class="line"></span><br><span class="line"> movb $0xdf, %al # 0xdf -> port 0x60</span><br><span class="line"> outb %al, $0x60 # 0xdf = 11011111, means set P2's A20 bit(the 1 bit) to 1</span><br><span class="line"></span><br><span class="line">到此,A20激活完成。</span><br><span class="line"> # Switch from real to protected mode, using a bootstrap GDT</span><br><span class="line"> # and segment translation that makes virtual addresses</span><br><span class="line"> # identical to physical addresses, so that the</span><br><span class="line"> # effective memory map does not change during the switch.</span><br><span class="line">转入保护模式,这里需要指定一个临时的GDT,来翻译逻辑地址。这里使用的GDT通过gdtdesc段定义。它翻译得到的物理地址和虚拟地址相同,所以转换过程中内存映射不会改变</span><br><span class="line"> lgdt gdtdesc</span><br><span class="line">载入gdt</span><br><span class="line"> movl %cr0, %eax</span><br><span class="line"> orl $CR0_PE_ON, %eax</span><br><span class="line"> movl %eax, %cr0</span><br><span class="line">打开保护模式标志位,相当于按下了保护模式的开关。cr0寄存器的第0位就是这个开关,通过CR0_PE_ON或cr0寄存器,将第0位置1</span><br><span class="line"></span><br><span class="line"> # Jump to next instruction, but in 32-bit code segment.</span><br><span class="line"> # Switches processor into 32-bit mode.</span><br><span class="line"> ljmp $PROT_MODE_CSEG, $protcseg</span><br><span class="line">由于上面的代码已经打开了保护模式了,所以这里要使用逻辑地址,而不是之前实模式的地址了。</span><br><span class="line">这里用到了PROT_MODE_CSEG, 他的值是0x8。根据段选择子的格式定义,0x8就翻译成:</span><br><span class="line"> INDEX TI CPL</span><br><span class="line"> 0000 0000 1 0 00</span><br><span class="line">INDEX代表GDT中的索引,TI代表使用GDTR中的GDT, CPL代表处于特权级。</span><br><span class="line"></span><br><span class="line">PROT_MODE_CSEG选择子选择了GDT中的第1个段描述符。这里使用的gdt就是变量gdt。下面可以看到gdt的第1个段描述符的基地址是0x0000,所以经过映射后和转换前的内存映射的物理地址一样。</span><br><span class="line">.code32 # Assemble for 32-bit mode</span><br><span class="line">protcseg:</span><br><span class="line"> # Set up the protected-mode data segment registers</span><br><span class="line"> movw $PROT_MODE_DSEG, %ax # Our data segment selector</span><br><span class="line"> movw %ax, %ds # -> DS: Data Segment</span><br><span class="line"> movw %ax, %es # -> ES: Extra Segment</span><br><span class="line"> movw %ax, %fs # -> FS</span><br><span class="line"> movw %ax, %gs # -> GS</span><br><span class="line"> movw %ax, %ss # -> SS: Stack Segment</span><br><span class="line"></span><br><span class="line">重新初始化各个段寄存器。</span><br><span class="line"> # Set up the stack pointer and call into C. The stack region is from 0--start(0x7c00)</span><br><span class="line"> movl $0x0, %ebp</span><br><span class="line"> movl $start, %esp</span><br><span class="line"> call bootmain</span><br><span class="line">栈顶设定在start处,也就是地址0x7c00处,call函数将返回地址入栈,将控制权交给bootmain</span><br><span class="line"></span><br><span class="line"> # If bootmain returns (it shouldn't), loop.</span><br><span class="line">spin:</span><br><span class="line"> jmp spin</span><br><span class="line"></span><br><span class="line"># Bootstrap GDT</span><br><span class="line">.p2align 2 # force 4 byte alignment</span><br><span class="line">gdt:</span><br><span class="line"> SEG_NULLASM # null seg</span><br><span class="line"> SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg for bootloader and kernel</span><br><span class="line"> SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg for bootloader and kernel</span><br><span class="line"></span><br><span class="line">gdtdesc:</span><br><span class="line"> .word 0x17 # sizeof(gdt) - 1</span><br><span class="line"> .long gdt # address gdt</span><br></pre></td></tr></table></figure>
<p>CPU 使用全局描述符表寄存器 GDTR 保存段表起始地址。GDTR 长 48 位,其中高 32 位为基地址,低 16 位为段界限。<code>lgdt</code>指令将源操作数中的值加载到全局描述符表格寄存器 (GDTR) 中(plus : lgdt 是间接寻址的)。载入 gdt 表,就是通过<code>lgdt gdtdesc</code>指令设置 GDTR 寄存器中的内容。</p>
<p>0x17 换成 10 进制就是 23,总共就有 24 个字节。一个 GDT 表项有 64 个 bit 占 8byte。总共 3 个表项一共就有 24 个字节。基地址 32 位是 gdt 这个标号所代表数据段的地址。GDT 的第一项总是为 0, 这就确保空段选择符的逻辑地址会被认为是无效的, 因此引起一个处理器异常.</p>
<p>上面的地址一共分了两段,第一段可执行可写,第二段可读,地址空间都覆盖整个 32 位 4GB 的保护模式空间。也正如之前 uCore 介绍的没有特别采用段机制</p>
<h3 id="如何使能和进入保护模式"><a href="#如何使能和进入保护模式" class="headerlink" title="如何使能和进入保护模式"></a>如何使能和进入保护模式</h3><p>因为我们无法直接操作 CR0,所以我们首先要用一个通用寄存器来保存当前 CR0 寄存器的值。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">movl %cr0, %eax</span><br><span class="line">orl $CR0_PE_ON, %eax #CR0_PE_ON的值就是0x1</span><br><span class="line">movl %eax, %cr0 #保护模式打开</span><br><span class="line"></span><br><span class="line"># Jump to next instruction, but in 32-bit code segment.</span><br><span class="line"># Switches processor into 32-bit mode.</span><br><span class="line">ljmp $PROT_MODE_CSEG, $protcseg</span><br></pre></td></tr></table></figure>
<p>由于一些现代 CPU 特性 (乱序执行和分支预测等),在转到保护模式之后 CPU 可能仍然在跑着实模式下的代码,这显然会造成一些问题。因此必须强迫 CPU 清空一次缓冲。对此,最有效的方法就是进行一次 long jump</p>
<blockquote>
<p>ljmp <imm1>, <imm2> # %cs ← imm1 # %ip ← imm2</p>
</blockquote>
<p>由于上面的代码已经打开了保护模式了,所以这里要使用逻辑地址,而不是之前实模式的地址了。<br>这里用到了 PROT_MODE_CSEG, 他的值是 0x8。根据段选择子的格式定义,0x8 就翻译成:<br>| INDEX | TI | CPL |<br>| —– | — | — |<br>| 0000 0000 1 | 0 | 00 |<br>INDEX 代表 GDT 中的索引,TI 代表使用 GDTR 中的 GDT, CPL 代表处于特权级。</p>
<p>PROT_MODE_CSEG 选择子选择了 GDT 中的第 1 个段描述符。这里使用的 gdt 就是变量 gdt。下面可以看到 gdt 的第 1 个段描述符的基地址是 0x0000,所以经过映射后和转换前的内存映射的物理地址一样。</p>
<p>进入保护模式后,需要重新设置所有段寄存器的内容,现在 这些寄存器里面都需要保存段选择子,因为刚才的段表只把内存空间分成 2 段但这两段地址空间完全重合,实际上只有一段。所以这个时候所有的代码、数据、堆栈段都是在一个段选择子里,所以值都是 0x8,完成设置之后跳转到函数 bootmain</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><span class="line">.code32 # Assemble for 32-bit mode</span><br><span class="line">protcseg:</span><br><span class="line"> # Set up the protected-mode data segment registers</span><br><span class="line"> movw $PROT_MODE_DSEG, %ax # Our data segment selector</span><br><span class="line"> movw %ax, %ds # -> DS: Data Segment</span><br><span class="line"> movw %ax, %es # -> ES: Extra Segment</span><br><span class="line"> movw %ax, %fs # -> FS</span><br><span class="line"> movw %ax, %gs # -> GS</span><br><span class="line"> movw %ax, %ss # -> SS: Stack Segment</span><br><span class="line"></span><br><span class="line"> # Set up the stack pointer and call into C. The stack region is from 0--start(0x7c00)</span><br><span class="line"> movl $0x0, %ebp</span><br><span class="line"> movl $start, %esp</span><br><span class="line"> call bootmain</span><br></pre></td></tr></table></figure>
<h3 id="总结"><a href="#总结" class="headerlink" title="总结"></a>总结</h3><p>Bootload 的启动过程可以概括如下:</p>
<p>首先,BIOS 将第一块扇区(存着 bootloader)读到内存中物理地址为 0x7c00 的位置,同时段寄存器 CS 值为 0x0000,IP 值为 0x7c00,之后开始执行 bootloader 程序。CLI 屏蔽中断(屏蔽所有的中断:为中断提供服务通常是操作系统设备驱动程序的责任,因此在 bootloader 的执行全过程中可以不必响应任何中断,中断屏蔽是通过写 CPU 提供的中断屏蔽寄存器来完成的);CLD 使 DF 复位,即 DF=0,通过执行 cld 指令可以控制方向标志 DF,决定内存地址是增大(DF=0,向高地址增加)还是减小(DF=1,向地地址减小)。设置寄存器 ax,ds,es,ss 寄存器值为 0;A20 门被关闭,高于 1MB 的地址都默认回卷到 0,所以要激活 A20,给 8042 发命令激活 A20,8042 有两个 IO 端口:0x60 和 0x64, 激活流程: 发送 0xd1 命令到 0x64 端口 –> 发送 0xdf 到 0x60,打开 A20 门。从实模式转换到保护模式(实模式将整个物理内存看成一块区域,程序代码和数据位于不同区域,操作系统和用户程序并没有区别对待,而且每一个指针都是指向实际的物理地址,地址就是 IP 值。这样,用户程序的一个指针如果指向了操作系统区域或其他用户程序区域,并修改了内容,那么其后果就很可能是灾难性的),所以就初始化全局描述符表使得虚拟地址和物理地址匹配可以相互转换;lgdt 汇编指令把通过 gdt 处理后的(asm.h 头文件中处理函数)描述符表的起始位置和大小存入 gdtr 寄存器中;将 CR0 的第 0 号位设置为 1,进入保护模式;指令跳转由代码段跳到 protcseg 的起始位置。设置保护模式下数据段寄存器;设置堆栈寄存器并调用 bootmain 函数</p>
<h2 id="分析-bootloader-加载-ELF-格式的-OS-的过程"><a href="#分析-bootloader-加载-ELF-格式的-OS-的过程" class="headerlink" title="分析 bootloader 加载 ELF 格式的 OS 的过程"></a>分析 bootloader 加载 ELF 格式的 OS 的过程</h2><p>通过执行<code>readseg((uintptr_t)ELFHDR, SECTSIZE * 8, 0)</code>将 os 代码的 ELF 头读到内存中来。SECTSIZE=512 是扇区大小,ELFHDR 是存储 ELF 头格式的 OS 的地址。从磁盘 0 地址处(里面不算启动扇区,实际上是从第 1 个扇区开始读)读 SECTSIZE * 8 (8 个扇区) 读到 ELFHDR 这个地址上。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">void</span> <span class="title">readseg</span><span class="params">(<span class="keyword">uintptr_t</span> va, <span class="keyword">uint32_t</span> count, <span class="keyword">uint32_t</span> offset)</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> <span class="keyword">uintptr_t</span> end_va = va + count; <span class="comment">//终止地址</span></span><br><span class="line"></span><br><span class="line"> <span class="comment">//要读只能一个扇区一起读,将地址va与扇区开始处对齐,这样如果从对齐(减小过的va开始读取)的话,读完后没减过的实际传入没修改过的va就是我们想要的中间内容(注意va是传值调用进来的)</span></span><br><span class="line"> va -= offset % SECTSIZE;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">uint32_t</span> secno = (offset / SECTSIZE) + <span class="number">1</span>; <span class="comment">//算一算从哪个扇区开始</span></span><br><span class="line"></span><br><span class="line"> <span class="keyword">for</span> (; va < end_va; va += SECTSIZE, secno++)</span><br><span class="line"> {</span><br><span class="line"> readsect((<span class="keyword">void</span> *)va, secno);</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>每一个扇区读取的代码<code>dst</code>指示该扇区要读到的内存地址,<code>secno</code>指示读取磁盘的哪一个扇区。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">void</span> <span class="title">readsect</span><span class="params">(<span class="keyword">void</span> *dst, <span class="keyword">uint32_t</span> secno)</span></span></span><br><span class="line"><span class="function"></span>{</span><br><span class="line"> <span class="comment">// wait for disk to be ready</span></span><br><span class="line"> waitdisk();</span><br><span class="line"></span><br><span class="line"> outb(<span class="number">0x1F2</span>, <span class="number">1</span>); <span class="comment">// 读几个扇区</span></span><br><span class="line"> outb(<span class="number">0x1F3</span>, secno & <span class="number">0xFF</span>); <span class="comment">//以下几个寄存器放扇区编号</span></span><br><span class="line"> outb(<span class="number">0x1F4</span>, (secno >> <span class="number">8</span>) & <span class="number">0xFF</span>);</span><br><span class="line"> outb(<span class="number">0x1F5</span>, (secno >> <span class="number">16</span>) & <span class="number">0xFF</span>);</span><br><span class="line"> outb(<span class="number">0x1F6</span>, ((secno >> <span class="number">24</span>) & <span class="number">0xF</span>) | <span class="number">0xE0</span>);</span><br><span class="line"> outb(<span class="number">0x1F7</span>, <span class="number">0x20</span>); <span class="comment">// 操作内容,要求读扇区</span></span><br><span class="line"></span><br><span class="line"> <span class="comment">// wait for disk to be ready</span></span><br><span class="line"> waitdisk();</span><br><span class="line"></span><br><span class="line"> <span class="comment">// read a sector</span></span><br><span class="line"> insl(<span class="number">0x1F0</span>, dst, SECTSIZE / <span class="number">4</span>);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>读取硬盘扇区的步骤:</p>
<ol>
<li><p>等待硬盘空闲。waitdisk 的函数实现只有一行:while ((inb(0x1F7) & 0xC0) != 0x40),意思是不断查询读 0x1F7 寄存器的最高两位,直到最高位为 0、次高位为 1(这个状态应该意味着磁盘空闲)才返回。</p>
</li>
<li><p>硬盘空闲后,发出读取扇区的命令。对应的命令字为 0x20,放在 0x1F7 寄存器中;读取的扇区数为 1,放在 0x1F2 寄存器中;读取的扇区起始编号共 28 位,分成 4 部分依次放在 0x1F3~0x1F6 寄存器中。</p>
</li>
<li><p>发出命令后,再次等待硬盘空闲。</p>
</li>
<li><p>硬盘再次空闲后,开始从 0x1F0 寄存器中读数据。注意 insl 的作用是”That function will read cnt dwords from the input port specified by port into the supplied output array addr.”,是以 dword 即 4 字节为单位的,因此这里 SECTIZE 需要除以 4.</p>
</li>
</ol>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> <span class="keyword">inline</span> <span class="keyword">void</span></span><br><span class="line">insl(<span class="keyword">uint32_t</span> port, <span class="keyword">void</span> *addr, <span class="keyword">int</span> cnt) {</span><br><span class="line"> <span class="function"><span class="keyword">asm</span> <span class="title">volatile</span> <span class="params">(</span></span></span><br><span class="line"><span class="function"><span class="params"> <span class="string">"cld;"</span></span></span></span><br><span class="line"><span class="function"><span class="params"> <span class="string">"repne; insl;"</span></span></span></span><br><span class="line"><span class="function"><span class="params"> : <span class="string">"=D"</span> (addr), <span class="string">"=c"</span> (cnt)</span></span></span><br><span class="line"><span class="function"><span class="params"> : <span class="string">"d"</span> (port), <span class="string">"0"</span> (addr), <span class="string">"1"</span> (cnt)</span></span></span><br><span class="line"><span class="function"><span class="params"> : <span class="string">"memory"</span>, <span class="string">"cc"</span>)</span></span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>读取完 8 个扇区的操作系统后开始解析:</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">// is this a valid ELF?</span></span><br><span class="line"><span class="keyword">if</span> (ELFHDR->e_magic != ELF_MAGIC)</span><br><span class="line">{</span><br><span class="line"> <span class="keyword">goto</span> bad;</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="comment">//elf文件中的program header table</span></span><br><span class="line"><span class="class"><span class="keyword">struct</span> <span class="title">proghdr</span> *<span class="title">ph</span>, *<span class="title">eph</span>;</span></span><br><span class="line"></span><br><span class="line"><span class="comment">// phoff: program header 表的起始位置偏移</span></span><br><span class="line">ph = (struct proghdr *)((<span class="keyword">uintptr_t</span>)ELFHDR + ELFHDR->e_phoff);</span><br><span class="line"></span><br><span class="line"><span class="comment">// phnum: program header 表中的入口数目</span></span><br><span class="line"><span class="comment">// program header 表是一个program header结构的数组, 每个结构描述了一个段或者系统准备程序执行所必需的其它信息</span></span><br><span class="line"><span class="comment">// eph 就是该表的终止位置</span></span><br><span class="line">eph = ph + ELFHDR->e_phnum;</span><br><span class="line"></span><br><span class="line"><span class="keyword">for</span> (; ph < eph; ph++)</span><br><span class="line">{</span><br><span class="line"> <span class="comment">//p_va virtual address to map segment</span></span><br><span class="line"> <span class="comment">//p_memsz size of segment in memory (bigger if contains bss)</span></span><br><span class="line"> <span class="comment">//p_offset file offset of segment由于开始的地址是0,该偏移就是在磁盘中的实际地址</span></span><br><span class="line"> readseg(ph->p_va & <span class="number">0xFFFFFF</span>, ph->p_memsz, ph->p_offset);</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="comment">// call the entry point from the ELF header</span></span><br><span class="line"><span class="comment">// note: does not return</span></span><br><span class="line">((<span class="keyword">void</span> (*)(<span class="keyword">void</span>))(ELFHDR->e_entry & <span class="number">0xFFFFFF</span>))();</span><br></pre></td></tr></table></figure>
<p>用工具读 kern 可以看到地址前面两位为空<code>& 0xFFFFFF</code>的作用就是截取后面的 24 位地址,还有注意<code>ELFHDR->e_entry</code>的值为<code>0x100000</code>,<code>ELFHDR</code>的地址是<code>0x10000</code>,少一个 0,不是一个地址。</p>
<p><img src="https://md.lagrange.plus/uploads/upload_9f1f9e574d09eab7da5d82fd3636b561.png" alt="kern"></p>
<h2 id="实现函数调用堆栈跟踪函数"><a href="#实现函数调用堆栈跟踪函数" class="headerlink" title="实现函数调用堆栈跟踪函数"></a>实现函数调用堆栈跟踪函数</h2><p>几乎所有本地编译器都会在每个函数体之前插入类似如下的汇编指令:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">pushl %ebp</span><br><span class="line">movl %esp , %ebp</span><br></pre></td></tr></table></figure>
<p>这样在程序执行到一个函数的实际指令前,已经有以下数据顺序入栈:参数、返回地址、ebp 寄存器。由此得到类似如下的栈结构(参数入栈顺序跟调用方式有关,这里以 C 语言默认的 CDECL 为例):</p>
<p><img src="https://md.lagrange.plus/uploads/upload_11fd42ee568230926dd27f08d50957e6.png" alt="stack"></p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">void</span></span><br><span class="line">print_stackframe(<span class="keyword">void</span>) {</span><br><span class="line"> <span class="comment">/* LAB1 YOUR CODE : STEP 1 */</span></span><br><span class="line"> <span class="comment">/* (1) call read_ebp() to get the value of ebp. the type is (uint32_t);</span></span><br><span class="line"><span class="comment"> * (2) call read_eip() to get the value of eip. the type is (uint32_t);</span></span><br><span class="line"><span class="comment"> * (3) from 0 .. STACKFRAME_DEPTH</span></span><br><span class="line"><span class="comment"> * (3.1) printf value of ebp, eip</span></span><br><span class="line"><span class="comment"> * (3.2) (uint32_t)calling arguments [0..4] = the contents in address (uint32_t)ebp +2 [0..4]</span></span><br><span class="line"><span class="comment"> * (3.3) cprintf("\n");</span></span><br><span class="line"><span class="comment"> * (3.4) call print_debuginfo(eip-1) to print the C calling function name and line number, etc.</span></span><br><span class="line"><span class="comment"> * (3.5) popup a calling stackframe</span></span><br><span class="line"><span class="comment"> * NOTICE: the calling funciton's return addr eip = ss:[ebp+4]</span></span><br><span class="line"><span class="comment"> * the calling funciton's ebp = ss:[ebp]</span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"> <span class="keyword">uint32_t</span> ebp = read_ebp(), eip = read_eip();</span><br><span class="line"></span><br><span class="line"> <span class="keyword">int</span> i, j;</span><br><span class="line"> <span class="comment">//#define STACKFRAME_DEPTH 20</span></span><br><span class="line"> <span class="keyword">for</span> (i = <span class="number">0</span>; ebp != <span class="number">0</span> && i < STACKFRAME_DEPTH; i ++) {</span><br><span class="line"> cprintf(<span class="string">"ebp:0x%08x eip:0x%08x args:"</span>, ebp, eip);</span><br><span class="line"> <span class="keyword">uint32_t</span> *args = (<span class="keyword">uint32_t</span> *)ebp + <span class="number">2</span>;</span><br><span class="line"> <span class="keyword">for</span> (j = <span class="number">0</span>; j < <span class="number">4</span>; j ++) {</span><br><span class="line"> cprintf(<span class="string">"0x%08x "</span>, args[j]);</span><br><span class="line"> }</span><br><span class="line"> cprintf(<span class="string">"\n"</span>);</span><br><span class="line"> print_debuginfo(eip - <span class="number">1</span>);</span><br><span class="line"> eip = ((<span class="keyword">uint32_t</span> *)ebp)[<span class="number">1</span>];</span><br><span class="line"> ebp = ((<span class="keyword">uint32_t</span> *)ebp)[<span class="number">0</span>];</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>首先定义两个局部变量 ebp、esp 分别存放 ebp、esp 寄存器的值。这里将 ebp 定义为指针,是为了方便后面取 ebp 寄存器的值。</p>
<p>调用 read_ebp 函数来获取执行 print_stackframe 函数时 ebp 寄存器的值,这里 read_ebp 必须定义为 inline 函数,否则获取的是执行 read_ebp 函数时的 ebp 寄存器的值。</p>
<p>调用 read_eip 函数来获取当前指令的位置,也就是此时 eip 寄存器的值。这里 read_eip 必须定义为常规函数而不是 inline 函数,因为这样的话在调用 read_eip 时会把当前 ebp 压栈,把 ebp 设置为 eip,故只要读调用函数后的 ebp 就可得到当前 eip 的值。</p>
<p>由于变量 eip 存放的是下一条指令的地址,因此将变量 eip 的值减去 1,得到的指令地址就属于当前指令的范围了。由于只要输入的地址属于当前指令的起始和结束位置之间,print_debuginfo 都能搜索到当前指令,因此这里减去 1 即可。</p>
<p>以后变量 eip 的值就不能再调用 read_eip 来获取了(每次调用获取的值都是相同的),而应该从 ebp 寄存器指向栈中的位置再往上一个单位中获取。这个地址指向上一个栈帧的最后入栈的元素。</p>
<p>由于 ebp 寄存器指向栈中的位置存放的是调用者的 ebp 寄存器的值,把现在的地址更新为这个地址里面存储的内容,据此可以继续顺藤摸瓜,不断回溯,直到 ebp 寄存器的值变为 0</p>
<h2 id="int-0x80系统调用实现"><a href="#int-0x80系统调用实现" class="headerlink" title="int 0x80系统调用实现"></a><code>int 0x80</code>系统调用实现</h2><p>CS 作为段基址寄存器储存着段选择子,里面包含 GDT 表的偏移以及当前的特权级 CPL。对于用户态程序来说 CPL 一般为 3,内核段的 DPL 都是 0,无法直接访问内核的数据与代码,若需要访问则需要通过中断实现。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">for</span> (i = <span class="number">0</span>; i < <span class="keyword">sizeof</span>(idt) / <span class="keyword">sizeof</span>(struct gatedesc); i ++) {</span><br><span class="line"> SETGATE(idt[i], <span class="number">0</span>, GD_KTEXT, __vectors[i], DPL_KERNEL);</span><br><span class="line">}</span><br><span class="line"><span class="comment">// set for switch from user to kernel</span></span><br><span class="line">SETGATE(idt[T_SWITCH_TOK], <span class="number">0</span>, GD_KTEXT, __vectors[T_SWITCH_TOK], DPL_USER);</span><br></pre></td></tr></table></figure>
<p>在设置中断向量表 IDT 的时候,故意将<code>T_SWITCH_TOK=0x79</code>(实际是第 0x80 项)的 DPL 设置为用户态权限 3,其余都设置为内核态 0.<br><img src="https://md.lagrange.plus/uploads/upload_85d13f38c3c4636e196ba2a53b33436f.png" alt="IDT"></p>
<p>每个 IDT 表项如上图所示,当一个程序引发 0x80 中断时,CPU 通过硬件检查 IDT 中对应的 DPL 特权级,发现为 3 可以访问,然后将对应的段选择符和入口偏移装入 CS:IP,该段选择符的 CPL 为 0,即进入内核,可以通过系统调用操作一些内存数据。</p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="http://example.com/2021/01/24/Lab2/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="lagrange">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="lagrange's blog">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2021/01/24/Lab2/" class="post-title-link" itemprop="url">Lab2 实验报告</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>