-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
1386 lines (973 loc) · 151 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 5.2.0">
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png">
<link rel="mask-icon" href="/images/logo.svg" color="#222">
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic&display=swap&subset=latin,latin-ext">
<link rel="stylesheet" href="/lib/font-awesome/css/all.min.css">
<script id="hexo-configurations">
var NexT = window.NexT || {};
var CONFIG = {"hostname":"keepalive555.github.io","root":"/","scheme":"Pisces","version":"7.8.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12,"onmobile":false},"copycode":{"enable":false,"show_result":false,"style":null},"back2top":{"enable":true,"sidebar":false,"scrollpercent":false},"bookmark":{"enable":false,"color":"#222","save":"auto"},"fancybox":false,"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"algolia":{"hits":{"per_page":10},"labels":{"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}},"localsearch":{"enable":false,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},"motion":{"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}}};
</script>
<meta name="description" content="技术博客">
<meta property="og:type" content="website">
<meta property="og:title" content="枫叶居">
<meta property="og:url" content="https://keepalive555.github.io/index.html">
<meta property="og:site_name" content="枫叶居">
<meta property="og:description" content="技术博客">
<meta property="og:locale" content="zh_CN">
<meta property="article:author" content="Lan Wei, Wang">
<meta name="twitter:card" content="summary">
<link rel="canonical" href="https://keepalive555.github.io/">
<script id="page-configurations">
// https://hexo.io/docs/variables.html
CONFIG.page = {
sidebar: "",
isHome : true,
isPost : false,
lang : 'zh-CN'
};
</script>
<title>枫叶居</title>
<noscript>
<style>
.use-motion .brand,
.use-motion .menu-item,
.sidebar-inner,
.use-motion .post-block,
.use-motion .pagination,
.use-motion .comments,
.use-motion .post-header,
.use-motion .post-body,
.use-motion .collection-header { opacity: initial; }
.use-motion .site-title,
.use-motion .site-subtitle {
opacity: initial;
top: initial;
}
.use-motion .logo-line-before i { left: initial; }
.use-motion .logo-line-after i { right: initial; }
</style>
</noscript>
</head>
<body itemscope itemtype="http://schema.org/WebPage">
<div class="container use-motion">
<div class="headband"></div>
<header class="header" itemscope itemtype="http://schema.org/WPHeader">
<div class="header-inner"><div class="site-brand-container">
<div class="site-nav-toggle">
<div class="toggle" aria-label="切换导航栏">
<span class="toggle-line toggle-line-first"></span>
<span class="toggle-line toggle-line-middle"></span>
<span class="toggle-line toggle-line-last"></span>
</div>
</div>
<div class="site-meta">
<a href="/" class="brand" rel="start">
<span class="logo-line-before"><i></i></span>
<h1 class="site-title">枫叶居</h1>
<span class="logo-line-after"><i></i></span>
</a>
<p class="site-subtitle" itemprop="description">桃李春风一杯酒,江湖夜雨十年灯</p>
</div>
<div class="site-nav-right">
<div class="toggle popup-trigger">
</div>
</div>
</div>
<nav class="site-nav">
<ul id="menu" class="main-menu menu">
<li class="menu-item menu-item-home">
<a href="/" rel="section"><i class="fa fa-home fa-fw"></i>首页</a>
</li>
<li class="menu-item menu-item-tags">
<a href="/tags/" rel="section"><i class="fa fa-tags fa-fw"></i>标签</a>
</li>
<li class="menu-item menu-item-categories">
<a href="/categories/" rel="section"><i class="fa fa-th fa-fw"></i>分类</a>
</li>
<li class="menu-item menu-item-archives">
<a href="/archives/" rel="section"><i class="fa fa-archive fa-fw"></i>归档</a>
</li>
<li class="menu-item menu-item-sitemap">
<a href="/baidusitemap.xml" rel="section"><i class="fa fa-sitemap fa-fw"></i>站点地图</a>
</li>
<li class="menu-item menu-item-about">
<a href="/about/" rel="section"><i class="fa fa-user fa-fw"></i>关于</a>
</li>
</ul>
</nav>
</div>
</header>
<div class="back-to-top">
<i class="fa fa-arrow-up"></i>
<span>0%</span>
</div>
<main class="main">
<div class="main-inner">
<div class="content-wrap">
<div class="content index posts-expand">
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://keepalive555.github.io/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.jpg">
<meta itemprop="name" content="Lan Wei, Wang">
<meta itemprop="description" content="技术博客">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="枫叶居">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/" class="post-title-link" itemprop="url">服务雪崩历险记(上)</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2021-04-10 22:09:03" itemprop="dateCreated datePublished" datetime="2021-04-10T22:09:03+08:00">2021-04-10</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2021-04-11 01:59:55" itemprop="dateModified" datetime="2021-04-11T01:59:55+08:00">2021-04-11</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84/" itemprop="url" rel="index"><span itemprop="name">系统架构</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1><span id="服务雪崩历险记上">服务雪崩历险记(上)</span></h1><p>服务雪崩——作为微服务架构中的经典问题,之前只是在技术博客中看到过,没想到自己有一天也遇到了,由于首次处理此类问题,经验较为欠缺,走了一些弯路,在此记录<strong>排查思路</strong>与<strong>解决方案</strong>,<strong>服务雪崩</strong>的概念可以参考网上技术文章,在此不做过多赘述。</p>
<h1><span id="线上现象">线上现象</span></h1><p>今天上午刚刚到公司,便收到<font color="#ff0000">【天气服务】CPU使用率超限报警</font>,上午一般是百度APP流量低峰期,因此笔者感觉比较奇怪,于是便打开报警链接,发现<strong>北京机房</strong>实例CPU使用率达到了惊人的<strong>245%**,远远超过了</strong>70%**的阈值(事后非常庆幸,笔者撰文时所有容器已开启资源硬限,雪崩发生时尚未开启CPU资源硬限,否则服务可用性可能会跌到个位数=_=),如图所示:</p>
<p><img src="/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/cpu.png" alt="CPU使用率"></p>
<p>笔者第一时间打开业务监控,查看接口监控指标。如下图,可以明显看到接口流量有若干个波峰,最高的波峰上涨了约80%(8000Q左右)。<font color="#ff0000">情理之中,意料之外的是——接口可用性、接口平响同样出现若干个波峰,依赖的所有下游服务可用性均下跌严重</font>。很明显【天气服务】出现了雪崩的迹象,但全线上涨的监控指标,掩盖了问题发生的根源,无从下手定位问题原因。<strong>雪崩的时候没有一片雪花是无辜的。</strong></p>
<p><img src="/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/qps.png" alt="流量"></p>
<h1><span id="问题定位">问题定位</span></h1><h2><span id="初步排查">初步排查</span></h2><p>因为无【服务雪崩】相关排查与处理经验,未能直击要害点。根据<strong>【止损优先】</strong>的原则,笔者凭直觉(经验),先列出可能的原因,尝试止损:</p>
<ol>
<li>Go模块未知Bug被触发(例如:Goroutine泄露、full GC),导致CPU使用率急剧飙升,流量急剧升高,下游服务可用性急剧变差;</li>
<li>端发生大规模崩溃,频繁重启,流量上涨,导致CPU过载;</li>
<li>下游服务不稳定,导致【天气接口】大量超时,触发上游服务重试,导致CPU使用率飙升;</li>
</ol>
<p>首先,选择三台问题实例(容器),查看<strong>实例CPU使用率</strong>监控指标,三台实例CPU使用率波峰出现时间点完全一致,且且内存使用率、已打开文件描述符等等监控均未见异常,基本排除实例内Go模块未知Bug引起CPU使用率飙升,进而引起连锁反应可能性。深入跟踪,查看三台实例所在物理机的CPU使用率(未开启资源硬限,混布服务存在资源侵占的可能性),CPU IDLE指标较高,计算资源充裕,排除服务混布,其它服务实例资源占用的可能性(**<em>注:CPU使用率升高与流量升高,是个鸡生蛋蛋生鸡的问题,CPU使用率升高,会导致部分请求处理不及时,引发上游服务重试,导致流量上涨、可用性下跌等等,所以不能武断**</em>)。综合实例CPU使用率与物理机CPU使用率,基本排除CPU性能瓶颈(延伸问题:若容器开启硬限制,如何排除)。</p>
<p><img src="/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/shili_cpu.png" alt="实例CPU使用率图"></p>
<p><img src="/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/jiqi.png" alt="物理机CPU使用率图"></p>
<p>端发生大规模崩溃,【天气服务】接口流量会上涨,调用的其它接口都会比较明显的上涨。但观察其它接口正常,端崩溃率监控正常,可能性2排除。</p>
<p>在可能性1与可能性2基本排除后,**<em>笔者几乎可以肯定是下游服务可用性出现波动,拖累天气服务超时,触发接入层重试**</em>,但监控中所有下游服务可用性(上游服务调用下游成功率)均大幅度下跌,无参考价值。分析线上RPC请求日志,失败原因均为”request canceled (client.timeout exceeded while awaiting header”,即<strong>读超时</strong>。一般情况下,因CPU使用率升高导致请求下游服务失败,在建立连接阶段就失败了,此处发生的错误为读超时,更加坚定笔者的判断。<font color="#ff0000">看上去真凶呼之欲出了,诡异的打脸马上来了,询问了所有下游业务方,业务方均反馈服务流量有上涨,但服务可用性、平响、CPU使用率均正常</font>。</p>
<h2><span id="尝试止损">尝试止损</span></h2><p>虽然尚未完全定位原因,但基本可以确定流量上涨的原因为【天气服务】上游服务重试导致的,『请求重试』加重了系统的负载,移除上游服务的重试应当是有效的。【天气服务】架构图如下(基于安全考虑,已屏蔽细节,实际架构有出入,不影响理解):</p>
<p><img src="/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/xuebeng.png" alt="架构图"></p>
<p>可以看到【流量网关】 => 【业务网关】=> 【天气服务】=> 【下游服务】存在3层重试,假如相邻的上下两层,请求超时,触发重试,到【下游服务】的流量最大会被放大至正常流量的8倍(2^3=8),很容易发生<strong>服务雪崩</strong>。流量网关,因接入了众多产品线,摘除『重试』风险较大,在【业务网关】与【天气服务】这两层,将『重试』逻辑移除。<strong>【天气服务】与【下游服务】流量逐渐下降,CPU使用率开始逐渐下降,止损操作已生效</strong>。</p>
<h2><span id="问题定位">问题定位</span></h2><p>摘除【业务网关】与【天气服务】的请求『重试』之后,【天气服务】流量趋于正常。因为【流量网关】『重试』尚未摘除,【天气服务】流量波峰与CPU波峰依然存在,参考价值依然不大。下游服务较为明显,陆续恢复正常,除了——<strong>Push地址位置同步服务</strong>。<font color="#ff0000">显然该服务是此次雪崩的真凶,那么为什么业务方观察的现业务可用性、平响、CPU使用率均正常,如此诡异呢</font>。</p>
<h3><span id="长尾请求">长尾请求</span></h3><p>笔者在将【天气服务】请求Push地址位置同步服务Timeout时间调小进行止损的同时,开始分析原因,因为RPC报错日志大多数是读超时,首先想到的便是存在长尾请求(有关长尾请求与分位时参考笔者上一篇博文:<a href="https://keepalive555.github.io/2020/09/24/%E9%95%BF%E5%B0%BE%E8%AF%B7%E6%B1%82/%EF%BC%89%EF%BC%8C%E7%BB%9F%E8%AE%A1RPC%E6%97%A5%E5%BF%9785%E5%88%86%E4%BD%8D%E6%97%B6%E5%A6%82%E4%B8%8B%EF%BC%88%E5%AE%9E%E9%99%85%E4%B8%8A%E8%AF%B7%E6%B1%82%E8%AF%A5%E6%9C%8D%E5%8A%A1%E7%9A%84RPC%E6%97%A5%E5%BF%9750%E5%88%86%E4%BD%8D%E6%97%B6%E4%B9%9F%E5%B0%86%E8%BF%91400ms%EF%BC%89%EF%BC%9A">https://keepalive555.github.io/2020/09/24/%E9%95%BF%E5%B0%BE%E8%AF%B7%E6%B1%82/),统计RPC日志85分位时如下(实际上请求该服务的RPC日志50分位时也将近400ms):</a></p>
<p><img src="/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/fenweishi.png" alt="85分位时"></p>
<p>而业务方监控平响峰值在5ms以内(如图所示),即使RPC日志中前50%请求耗时为0ms,接口平响也有200ms,相差40倍。于是笔者与业务方RD开始梳理请求全链路,查找线索。</p>
<p><img src="/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/mapisss.png" alt="!mapi平响"></p>
<h3><span id="请求链路梳理">请求链路梳理</span></h3><p>与业务方RD沟通后得知,下游业务采用『Nginx+PHP』的部署方式,每台实例前端均部署一台Nginx用作反向代理, 若干PHP进程(线程)处理请求。请求链路:【天气服务】=> 【实例Nginx】=>【业务方PHP进程】。业务方RD在Nginx日志中发现了大量HTTP 499错误码的请求,HTTP 499表示客户端因请求超时关闭请求,与【天气服务】RPC日志表现一致,耗时500ms,问题基本定位——**<em>495ms的平响时间差是在【实例Nginx】转发至【业务方PHP进程】的过程中产生的**</em>。</p>
<p>由于Nginx日志可供参考的信息有限,在OP同学的帮助下最终定位了原因——PHP的虚拟机Worker线程处理能力达到了上限,后续到达的请求排队等候处理,直至超时,**<em>类似于限流算法中的漏桶算法**</em>。<font color="#ff0000">这也是业务服务可用性、平响、CPU使用率均正常这种诡异现象的原因</font>。</p>
<p><img src="/2021/04/10/%E6%9C%8D%E5%8A%A1%E9%9B%AA%E5%B4%A9/queuss.png" alt="队列堆积"></p>
<h3><span id="导火索">导火索</span></h3><p>原因已定位,那导致请求堆积的罪魁祸首是什么呢——Push消息推送。年底运营活动较多,通过Push渠道进入百度APP的用户变多,下游业务吞吐量见顶。</p>
<h3><span id="事件复盘">事件复盘</span></h3><p>“重试”属于控制论中的“正反馈”,会逐渐增强“”活动“——“雪崩”触发”重试”,“重试”强化“雪崩”程度,所以若发生“服务雪崩”可以且应当首先考虑调整“重试”策略。此次【服务雪崩】发生的逻辑链如下:</p>
<ol>
<li>年底各业务方运营活动增多,Push推送频繁,“Push集群”流量逐渐上涨</li>
<li>“Push集群”实例PHP虚拟机Worker线程全部被占用,并发处理能力达到上限</li>
<li>“天气服务”请求“Push服务”,PHP虚拟机在处理其它请求,请求排队,读超时,此次请求失败</li>
<li>“天气服务”请求”Push服务”超时,触发RPC请求重试,“天气服务”再次请求“Push服务”</li>
<li>“天气请求”整体处理超时,触发“天气服务”上游“业务网关”重试策略,发起天气请求</li>
<li>“天气服务”再次对所有“下游服务”发起请求,流量被放大到至4倍</li>
<li>因为下游所有服务负载加大,“业务网关”处理”天气请求”超时,触发“流量网关”请求重试</li>
<li>“天气服务”再次对所有“下游服务”发起请求,流量被放大到至8倍</li>
<li>“天气服务”所有下游服务流量上涨、可用性均下跌、平响升高</li>
<li>“服务雪崩”ಥ_ಥ</li>
</ol>
<h1><span id="服务雪崩解决方案">服务雪崩解决方案</span></h1><p>由于【天气服务】是由PHP模块迁移而来,尚未接入手百的Service Mesh,所以止损方案有限。服务雪崩是微服务架构中常见的问题,解决方案也比较成熟,笔者在下一篇博文中讲述,常见方案参考:</p>
<ul>
<li>入口流量限流</li>
<li>访问下游服务增加断路器</li>
<li>异步请求弱依赖服务</li>
<li>Locality-aware load balancing路由算法</li>
<li>……</li>
</ul>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://keepalive555.github.io/2020/09/24/%E9%95%BF%E5%B0%BE%E8%AF%B7%E6%B1%82/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.jpg">
<meta itemprop="name" content="Lan Wei, Wang">
<meta itemprop="description" content="技术博客">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="枫叶居">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2020/09/24/%E9%95%BF%E5%B0%BE%E8%AF%B7%E6%B1%82/" class="post-title-link" itemprop="url">长尾请求与分位时</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2020-09-24 00:44:18" itemprop="dateCreated datePublished" datetime="2020-09-24T00:44:18+08:00">2020-09-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2020-10-22 00:07:00" itemprop="dateModified" datetime="2020-10-22T00:07:00+08:00">2020-10-22</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96/" itemprop="url" rel="index"><span itemprop="name">性能优化</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1><span id="写在前面">写在前面</span></h1><p>本文章为笔者原创,转载需要表明出处,联系作者:<a href="mailto:luckydreamcatcher@163.com">luckydreamcatcher@163.com</a> | <a href="mailto:the.matrix.vvv@gmail.com">the.matrix.vvv@gmail.com</a></p>
<p>QA同学在线上测试重构后的<strong>golang模块</strong>时发现,会偶现后端响应<strong>超时</strong>的现象。在之前的压测中,接口监控<strong>响应</strong>稳定在10ms左右,所以猜测存在<strong>长尾请求</strong>。</p>
<h1><span id="目前问题">目前问题</span></h1><h2><span id="监控指标">监控指标</span></h2><p>目前业务监控系统,反应接口耗时的系统指标为——<strong>平响</strong>,即平均响应时间=<strong>单位时间内所有请求耗时总和/请求数</strong>。</p>
<p>平均数并不能够反应数据的波动情况,例如:请求a耗时10ms(记为cost(a)=10ms),请求b耗时300ms(记为cost(b)=300ms),请求a与请求b的平均响应时间= cost(a, b) = (cost(a) + cost(b)) / 2 =<strong>155ms</strong> 。平均耗时155ms(<=200ms)是达标的,但是请求b耗时300ms明显是未达标的。</p>
<p>APP后端研发工程师,都了解对端接口请求耗时<strong>200ms</strong>是一个临界阈值——请求耗时200ms以下,用户对网络延迟几乎无感,体验较好,请求耗时200ms以上,网络延迟感明显,用户体验较差。因此请求耗时是否<=200ms经常作为接口性能优化的判断条件之一。在业务中,经常会遇到<strong>命中缓存</strong>与<strong>未命中缓存</strong>时耗时差距较大的场景,<strong>所以平响无法全面的衡量系统的性能</strong>。</p>
<h2><span id="长尾请求">长尾请求</span></h2><p>业界关于延迟有一个常用的<a target="_blank" rel="noopener" href="https://stackoverflow.com/questions/12808934/what-is-p99-latency">P99标准</a>,即99%的请求应该比指定的延迟更小,仅允许1%的请求大于指定的延迟,这1%的请求即为”长尾请求”。打个形象的比喻,班级内99%同学的成绩都非常优秀,但总会有几位同学拖班级平均成绩后腿儿,拉低班级的“平均分,这几位同学就是“长尾请求”。</p>
<p>长尾请求的产生原因是多种多样的且复杂的,包括实现方式、系统因素、硬件因素等等,在分布式中常见原因如下:</p>
<ul>
<li>依赖的下游服务有波动;</li>
<li>资源竞争(包括:文件、锁、硬件资源);</li>
<li>网络波动;</li>
<li>机器负载较大,系统调度,排队;</li>
<li>fullGC;</li>
<li>CPU降低功率控制温度;</li>
</ul>
<p>有关长尾请求更多介绍于技术优化思路,参考Google Jeff Dean大神的论文:<a target="_blank" rel="noopener" href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.732.6087&rep=rep1&type=pdf%E3%80%82">http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.732.6087&rep=rep1&type=pdf。</a></p>
<p>长尾请求在某种意义上来讲是无法消除的,但是我们可以通过技术手段将长尾请求控制在一定的比例之内,<strong>因此长尾请求也是很多性能优化工作的关注重点</strong>。由于长尾请求的存在,<strong>平响</strong>指标无法很好的反应绝大多数请求的耗时情况,因此有了<strong>分位时</strong>的概念,通俗的理解就是xx%的耗时在多少之内。</p>
<h2><span id="分位时">分位时</span></h2><h3><span id="概念介绍">概念介绍</span></h3><p>分位数,是统计学的一个术语,概念如下:</p>
<blockquote>
<p>百分位数又称百分位分数(percentile),是一种相对地位量数,它是次数分布(Frequency Distribution,频数分布)中的一个点。把一个次数分布排序后,分为 100 个单位,百分位数就是次数分布中相对于某个特定百分点的原始分数,它表明在次数分布中特定个案百分比低于该分数。</p>
</blockquote>
<p>通俗的讲,<strong>将数据按照升序(或降序)排列,等分为100份,在P=0.9(即99%)位置的数是多少</strong>。例如:全校800名学生,80分位数,指80%的学生考分在多少分以上,我们可以这样计算:</p>
<ol>
<li>将800名学生成绩,按照从高到低的降序排列;</li>
<li>800名同学80%的名次为:800 * 80% = 640;</li>
<li>全校成绩排名第640名的学生成绩即我们所需的80分位数;</li>
</ol>
<p>现实中,存在<code>total(总数) * percent(百分比)</code>为浮点数的情况,例如9名学生的分数分别为:100,88,89,90,95,70,65,78,79,求90分位数,按照上述思路来计算:</p>
<ol>
<li>将9名学生成绩,按照从高到低的升序排列为:100, 95, 90, 89, 88, 79, 78, 70, 65;</li>
<li>9名同学90%的名次为:9 * 90% = 8.1;</li>
</ol>
<p>问题来了,第8.1名学生的成绩为多少?显然不存在第8.1名学生,假如存在的话,那么第8.1名学生的成绩一定在第8名与第9名之间。拆开来看,第8.1名学生成绩等价于在第8名学生成绩基础上,加上第9名与第8名成绩之差乘以10%=score(8)+(score(9)-score(8))*10% = 70 + (65 - 70)*10% =69.50,即这9名学生的90分位数为69.50分(注意:假设第9名与第8名成绩区间是分布均匀的,实际上样本数量较少时波动比较大,随着样本数量变大趋向于均匀)。</p>
<p>总结分位数计算规则如下:</p>
<ol>
<li>将输入数组升序/降序排列,数组长度为n;</li>
<li>求数组[0, n)的P%的下标,m = n*P% - 1 = i + j,i代表整数部分,j代表小数部分;</li>
<li>求下标为m的元素值 f(m) = f(i) + (f(j) - f(i)) * j;</li>
</ol>
<p>参考上述,可得分位时,是将所有请求耗时由小至大升序排列,求得分位数。</p>
<h3><span id="计算工具">计算工具</span></h3><p>计算分位时的工具,可参考笔者写的简易<a target="_blank" rel="noopener" href="https://github.com/keepalive555/victorinox/blob/main/src/percentile.py">Python脚本</a></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">curl -L -O https://raw.githubusercontent.com/keepalive555/victorinox/main/src/percentile.py</span><br></pre></td></tr></table></figure>
<p>求一批请求耗时的99分位时,Linux示例命令如下:</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">cat service.log|grep -o -P <span class="string">"cost\[\d+(\.|\])?"</span>|grep -o -P <span class="string">"\d+"</span>|./percentile.py</span><br></pre></td></tr></table></figure>
<p>在笔者的案例中,取生产环境日志约<strong>10w</strong>条,求得重构后<code>golang</code>接口,99.9分位时为200ms,平响为10ms,差距是要比想想中的要大的多,所以关注系统性能指标不只需要关注<strong>平响</strong>,也需要关注<strong>分位时</strong>。</p>
<h2><span id="优化思路">优化思路</span></h2><p>长尾请求的产生原因是多种多样的,分布式系统中<strong>最常见的</strong>场景是受下游服务拖累,例如:MySQL慢查询、分布式缓存过期、下游服务过载等等,合理设置下游服务超时时间是非常有必要的。</p>
<p>目前许多流行的RPC框架,提供了解决长尾请求的方案——<code>Backup Request</code>,例如百度内部的BRPC框架。客户端首先向一台下游服务Server发送RPC请求,若在<code>backup_request_ms</code>(通常小于超时时间)内未取到数据,则在向下游服务另外一台Server发送RPC请求,哪台Server先响应则取哪条。设置合理的<code>backup_request_ms</code>,大部分情况下只会发一个请求,对下游服务的压力可以不计。</p>
<p>目前了解到,百度小程序C端团队,在做<code>BackupRequest</code>的改造,准备借鉴一下^_^。</p>
<h1><span id="参考资料">参考资料</span></h1><p><a target="_blank" rel="noopener" href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.732.6087&rep=rep1&type=pdf">The tail at scale</a></p>
<p><a target="_blank" rel="noopener" href="https://juejin.im/post/6844903904371539975">经典分布式论文阅读:The Tail at Scale</a></p>
<p><a target="_blank" rel="noopener" href="https://baike.baidu.com/item/%E7%99%BE%E5%88%86%E4%BD%8D%E6%95%B0/10064171?fr=aladdin">百分位数</a></p>
<p><a target="_blank" rel="noopener" href="https://www.cnblogs.com/liuning8023/p/3531900.html">分位数</a></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://keepalive555.github.io/2020/09/20/Golang%E6%80%A7%E8%83%BD%E5%88%86%E6%9E%90/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.jpg">
<meta itemprop="name" content="Lan Wei, Wang">
<meta itemprop="description" content="技术博客">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="枫叶居">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2020/09/20/Golang%E6%80%A7%E8%83%BD%E5%88%86%E6%9E%90/" class="post-title-link" itemprop="url">Golang性能分析</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2020-09-20 16:47:49" itemprop="dateCreated datePublished" datetime="2020-09-20T16:47:49+08:00">2020-09-20</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2020-10-21 23:58:03" itemprop="dateModified" datetime="2020-10-21T23:58:03+08:00">2020-10-21</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96/" itemprop="url" rel="index"><span itemprop="name">性能优化</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1><span id="写在前面">写在前面</span></h1><p>本文章为笔者原创,转载需要表明出处,联系作者:<a href="mailto:luckydreamcatcher@163.com">luckydreamcatcher@163.com</a> | <a href="mailto:the.matrix.vvv@gmail.com">the.matrix.vvv@gmail.com</a></p>
<p>笔者最近在做<strong>golang重构旧php模块</strong>的事情,PHP模块峰值请求约1.5w QPS,是典型的高并发场景,重构过程中,代码中一些容易被开发者**”选择性忽略”<strong>的问题会被指数级放大,比如内存泄露、full GC等等,所以</strong>上线/放量**前必须进行压力测试。</p>
<p>为了更贴近生产环境,与QA同学合作,将重构后的golang模块部署到生产集群,选择一台20标准CPU核的实例,从服务列表中摘除,做压力测试。预期单实例的配置,可以扛住400QPS,在压力测试过程中发现,并发达到400QPS时,实例CPU使用率达到100%,成为性能瓶颈。</p>
<p><strong>节约机器资源作为golang重构旧php的重要收益之一</strong>,为了达成此目标,笔者必须解决golang模块的cpu性能瓶颈,达到预期性能。</p>
<h1><span id="cpu性能分析">CPU性能分析</span></h1><p>CPU性能分析,又称为CPU Profiling,下面介绍了三种笔者常用的性能分析手段:</p>
<ul>
<li>go tool pprof命令行工具</li>
<li>go tool pprof可视化工具</li>
<li>FlameGraph火焰图</li>
</ul>
<p>go tool pprof工具非常强大,性能分析不止这三中方式,可根据业务场景自由选择。实践中,笔者推荐使用<code>go tool pprof</code>与<code>Flame Graph</code>两种方式相结合。</p>
<p>为了在不影响阅读的前提下,保证服务安全,文章的敏感信息,笔者均用”xxx”进行了替换。</p>
<h2><span id="cpu-profiling原理">CPU Profiling原理</span></h2><p><strong>借助工具进行CPU Profiling之前,我们需要了解CPU Profiling的基本原理,这样才可以对数据做出更准确的判断</strong>。然而许许多多Google搜索到的技术博客,几乎千篇一律的都是在介绍golang pprof工具的使用。笔者在阅读golang <code>runtime/pprof</code>源码的基础上,借鉴了Linux <code>perf</code>工具的工作原理,说明一下。</p>
<p>Golang pprof默认会以100Hz(1秒100次)的频率,采集各个goroutine调用栈。假设函数<code>foo</code>在采样时,位于调用栈栈顶,则可以认为当前goroutine在执行<code>foo</code>函数,假如100次采样,<code>foo</code>函数30次位于调用栈栈顶,则可以认为<code>foo</code>函数执行消耗30%。了解了基本原理,下面我们便可以借助工具进行分析。</p>
<h2><span id="golang模块开启profiling">Golang模块开启Profiling</span></h2><p>Golang官方提供强大的<code>runtime/pprof</code>包,用于Golang程序的Profiling。<code>runtime/pprof</code>包功能强大,但对于需长久运行的服务,不够方便。在生产环境中,建议开启<code>http pprof</code>,通过Web服务提供Profiling数据,方便直接使用浏览器查看或其它分析工具拉取数据进行进一步分析。</p>
<figure class="highlight go"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> (</span><br><span class="line"> _ <span class="string">"net/http/pprof"</span></span><br><span class="line">)</span><br></pre></td></tr></table></figure>
<p><code>net/http/pprof</code>包<code>init</code>初始化函数会在默认<code>HTTP Server</code>注册几个路由,将<code>runtime/pprof</code>的输出包装为<code>http</code>服务的响应,逻辑比较简单,可以参考阅读<code>net/http/pprof</code>包源码,此处不做赘述。</p>
<h2><span id="go-tool-pprof命令行工具">go tool pprof命令行工具</span></h2><p>采用Golang自带的pprof命令行工具,进行CPU性能分析:</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">go tool pprof http://xxxx.baidu.com:2021/debug/pprof/profile?seconds=120</span><br></pre></td></tr></table></figure>
<p>go tool pprof会将服务端http响应数据写入本地文件(本地文件默认存储<code>/root/pprof</code>目录下,输入go tool pprof <filepath>即可分析本地文件),运行2min之后,自动进入交互式命令行,使用<code>top</code>命令即可查看CPU耗时排行:</filepath></p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><span class="line">(pprof) top</span><br><span class="line">Showing nodes accounting <span class="keyword">for</span> 18.09s, 35.20% of 51.39s total</span><br><span class="line">Dropped 856 nodes (cum <= 0.26s)</span><br><span class="line">Showing top 10 nodes out of 246</span><br><span class="line"> flat flat% sum% cum cum%</span><br><span class="line"> 3.20s 6.23% 6.23% 3.58s 6.97% syscall.Syscall</span><br><span class="line"> 2.43s 4.73% 10.96% 10.65s 20.72% runtime.mallocgc</span><br><span class="line"> 2.06s 4.01% 14.96% 2.10s 4.09% encoding/json.stateInString</span><br><span class="line"> 2.02s 3.93% 18.89% 3.97s 7.73% runtime.scanobject</span><br><span class="line"> 1.61s 3.13% 22.03% 4.76s 9.26% encoding/json.checkValid</span><br><span class="line"> 1.43s 2.78% 24.81% 1.43s 2.78% runtime.usleep</span><br><span class="line"> 1.39s 2.70% 27.52% 5.18s 10.08% runtime.mapassign_faststr</span><br><span class="line"> 1.36s 2.65% 30.16% 1.67s 3.25% encoding/json.unquoteBytes</span><br><span class="line"> 1.30s 2.53% 32.69% 1.45s 2.82% net/url.unescape</span><br><span class="line"> 1.29s 2.51% 35.20% 1.86s 3.62% encoding/json.(*decodeState).rescanLiteral</span><br></pre></td></tr></table></figure>
<p>标注:</p>
<ul>
<li>flat: 函数(不包含子函数)执行耗时;</li>
<li>flat%:函数执行耗时占抽样时间百分比;</li>
<li>sum%: 此<strong>行</strong>(包括)之前,flat%之和;</li>
<li>cum: 函数(包含调用的子函数)执行耗时;</li>
<li>cum%: 函数(包含调用的子函数)的执行耗时占抽样时间百分比;</li>
</ul>
<p>top命令默认显示前10条数据,按照flat列降序排列。<strong>虽然定位了CPU耗时较高的函数,但是粒度较细,并不能直观反应产生性能瓶颈的代码</strong>。可以指定<code>-cum</code>参数,显示函数累加执行耗时排行,键入命令<code>top20 -cum</code>:</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br></pre></td><td class="code"><pre><span class="line">(pprof) top20 -cum</span><br><span class="line">Showing nodes accounting <span class="keyword">for</span> 0.61s, 1.19% of 51.39s total</span><br><span class="line">Dropped 856 nodes (cum <= 0.26s)</span><br><span class="line">Showing top 20 nodes out of 246</span><br><span class="line"> flat flat% sum% cum cum%</span><br><span class="line"> 0.04s 0.078% 0.078% 41.26s 80.29% net/http.(*conn).serve</span><br><span class="line"> 0.04s 0.078% 0.16% 38.25s 74.43% github.com/gin-gonic/gin.(*Context).Next</span><br><span class="line"> 0.03s 0.058% 0.21% 38.24s 74.41% icode.baidu.com/baidu/gdp/gdp.WebHandlerFunc.toGinHandlerFunc.func1</span><br><span class="line"> 0 0% 0.21% 38.17s 74.28% github.com/gin-gonic/gin.(*Engine).handleHTTPRequest</span><br><span class="line"> 0 0% 0.21% 38.06s 74.06% github.com/gin-gonic/gin.(*Engine).ServeHTTP</span><br><span class="line"> 0 0% 0.21% 37.96s 73.87% net/http.serverHandler.ServeHTTP</span><br><span class="line"> 0.01s 0.019% 0.23% 36.86s 71.73% github.com/gin-gonic/gin.RecoveryWithWriter.func1</span><br><span class="line"> 0 0% 0.23% 36.86s 71.73% icode.baidu.com/baidu/gdp/gdp.ginHandler2WebHandler.func1</span><br><span class="line"> 0.07s 0.14% 0.37% 36.80s 71.61% icode.baidu.com/baidu/gdp/gdp.ShoubaiTowerLogware</span><br><span class="line"> 0 0% 0.37% 32.46s 63.16% icode.baidu.com/baidu/gdp/gdp.recovery</span><br><span class="line"> 0.01s 0.019% 0.39% 32.45s 63.14% icode.baidu.com/baidu/xxxxxx/xxxxloc/middlewares.Recovery</span><br><span class="line"> 0.03s 0.058% 0.45% 32.41s 63.07% icode.baidu.com/baidu/xxxxxx/xxxxloc/middlewares.PProfAuth</span><br><span class="line"> 0 0% 0.45% 31.95s 62.17% icode.baidu.com/baidu/xxxxxx/xxxxloc/middlewares.ParseParams</span><br><span class="line"> 0.04s 0.078% 0.53% 30.18s 58.73% icode.baidu.com/baidu/xxxxxx/xxxxloc/controllers.(*WeatherController).GetIndexWeather</span><br><span class="line"> 0.04s 0.078% 0.6% 27.44s 53.40% icode.baidu.com/baidu/xxxxxx/xxxxloc/models/service/page/weather.(*WeatherIndexIphone).GetData</span><br><span class="line"> 0.01s 0.019% 0.62% 25.48s 49.58% icode.baidu.com/baidu/xxxxxx/xxxxloc/models/service/data/weather.(*DataWeatherCommon).GetWeatherData</span><br><span class="line"> 0.02s 0.039% 0.66% 21.03s 40.92% encoding/json.Unmarshal</span><br><span class="line"> 0 0% 0.66% 16.16s 31.45% encoding/json.(*decodeState).unmarshal</span><br><span class="line"> 0.04s 0.078% 0.74% 16.16s 31.45% encoding/json.(*decodeState).value</span><br><span class="line"> 0.23s 0.45% 1.19% 16.15s 31.43% encoding/json.(*decodeState).object</span><br></pre></td></tr></table></figure>
<p>由上可以观察到,系统执行流程大概包括了http框架、controller层、page层、data层,符合调用堆栈。encoding/json.Unmarshal函数累积执行耗时占总样本百分比为<strong>40.92%**,很明显不合理,是系统</strong>性能瓶颈**。go tool pprof命令行工具使用简单方便,无需要借助工具,但是表达不直观,我们可以借助下面提到的两种方式——以图或火焰图的形式。</p>
<h2><span id="go-tool-pprof可视化工具">go tool pprof可视化工具</span></h2><p>go tool pprof命令行支持-png、-svg、-pdf等选项,输出png图片、svg图片、pdf文档。go tool pprof此功能依赖graphviz组件。</p>
<h3><span id="安装graphviz">安装graphviz</span></h3><p>graphviz组件依赖较多,建议解决各个linux发行版本的包管理器进行安装,源码安装参考官方:<a target="_blank" rel="noopener" href="http://www.graphviz.org/download/source/%E3%80%82">http://www.graphviz.org/download/source/。</a></p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># debian</span></span><br><span class="line">apt-get install -y graphviz</span><br><span class="line"><span class="comment"># macos</span></span><br><span class="line">brew install graphviz</span><br></pre></td></tr></table></figure>
<h3><span id="导出图片">导出图片</span></h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">go tool pprof -png http://xxxx.baidu.com:2021/debug/pprof/profile?seconds=120 >> profile.png</span><br></pre></td></tr></table></figure>
<h2><span id="火焰图">火焰图</span></h2><p>火焰图(FlameGraph)能直观的反映出系统的执行情况,是一种性能分析利器。Golang语言pprof工具暂不支持导出火焰图,需要安装第三方工具。笔者推荐使用由Uber开源的go-torch。</p>
<h3><span id="安装flamegraph分析工具">安装FlameGraph分析工具</span></h3><p>安装go-torch:</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">go install github.com/uber/go-torch</span><br></pre></td></tr></table></figure>
<p>安装go-torch依赖——FlameGraph脚本:</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 下载FlameGraph</span></span><br><span class="line">wget https://github.com/brendangregg/FlameGraph/archive/master.zip</span><br><span class="line"><span class="comment"># 解压</span></span><br><span class="line">unzip master.zip</span><br><span class="line"><span class="comment"># 移动至/opt目录</span></span><br><span class="line">sudo mv FlameGraph-master /opt/FlameGraph</span><br><span class="line"><span class="comment"># 添加至系统Path中</span></span><br><span class="line"><span class="built_in">echo</span> <span class="string">'export PATH=$PATH:/opt/FlameGraph'</span> |sudo tee -a /etc/profile && <span class="built_in">source</span> /etc/profile</span><br></pre></td></tr></table></figure>
<p>go-torch工具安装成功,运行<code>go torch --help</code>查看帮助信息:</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br></pre></td><td class="code"><pre><span class="line">Usage:</span><br><span class="line"> go-torch [options] [binary] <profile <span class="built_in">source</span>></span><br><span class="line"></span><br><span class="line">pprof Options:</span><br><span class="line"> -u, --url= Base URL of your Go program (default: http://localhost:8080)</span><br><span class="line"> --suffix= URL path of pprof profile (default: /debug/pprof/profile)</span><br><span class="line"> -b, --binaryinput= File path of previously saved binary profile. (binary profile is anything accepted by</span><br><span class="line"> https://golang.org/cmd/pprof)</span><br><span class="line"> --binaryname= File path of the binary that the binaryinput is <span class="keyword">for</span>, used <span class="keyword">for</span> pprof inputs</span><br><span class="line"> -t, --seconds= Number of seconds to profile <span class="keyword">for</span> (default: 30)</span><br><span class="line"> --pprofArgs= Extra arguments <span class="keyword">for</span> pprof</span><br><span class="line"></span><br><span class="line">Output Options:</span><br><span class="line"> -f, --file= Output file name (must be .svg) (default: torch.svg)</span><br><span class="line"> -p, --<span class="built_in">print</span> Print the generated svg to stdout instead of writing to file</span><br><span class="line"> -r, --raw Print the raw call graph output to stdout instead of creating a flame graph; use with Brendan Gregg<span class="string">'s flame</span></span><br><span class="line"><span class="string"> graph perl script (see https://github.com/brendangregg/FlameGraph)</span></span><br><span class="line"><span class="string"> --title= Graph title to display in the output file (default: Flame Graph)</span></span><br><span class="line"><span class="string"> --width= Generated graph width (default: 1200)</span></span><br><span class="line"><span class="string"> --hash Colors are keyed by function name hash</span></span><br><span class="line"><span class="string"> --colors= set color palette. choices are: hot (default), mem, io, wakeup, chain, java, js, perl, red, green, blue,</span></span><br><span class="line"><span class="string"> aqua, yellow, purple, orange</span></span><br><span class="line"><span class="string"> --cp Use consistent palette (palette.map)</span></span><br><span class="line"><span class="string"> --reverse Generate stack-reversed flame graph</span></span><br><span class="line"><span class="string"> --inverted icicle graph</span></span><br><span class="line"><span class="string"></span></span><br><span class="line"><span class="string">Help Options:</span></span><br><span class="line"><span class="string"> -h, --help Show this help message</span></span><br></pre></td></tr></table></figure>
<p>通常情况下,我们只需要关注-u参数与-f参数即可,运行如下命令进行CPU采样,输出svg格式火焰图:</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">go-torch -u http://xxxx.baidu.com:2021/debug/pprof/profile?seconds=120</span><br></pre></td></tr></table></figure>
<p>go-torch运行输出如下:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">INFO[10:57:29] Run pprof command: go tool pprof -raw -seconds 30 http://xxxx.baidu.com:2021/debug/pprof/profile?seconds=120</span><br><span class="line">INFO[10:58:00] Writing svg to torch.svg</span><br></pre></td></tr></table></figure>
<p>FlameGrpah文件:<a href="torch.svg">torch.svg</a></p>
<h3><span id="flamegraph-火焰图分析">FlameGraph-火焰图分析</span></h3><p>许多人对火焰图的理解有歧义,有些似懂非懂,按照自己的主观意识去解读,导致陷入误区。要使用火焰图进行性能分析,首先需要明确火焰图<strong>x轴</strong>与<strong>y轴</strong>的确切含义。</p>
<p><strong><em>y 轴表示调用栈,每一层都标识一个函数,调用栈越深,火焰就越高,顶部就是当前在执行的函数。</em></strong></p>
<p><strong><em>x 轴表示抽样数,如果一个函数在 x 轴占据的宽度越宽,表示它被抽到的次数多,执行的时间长(x 轴非时间轴,是所有的调用栈合并后,按函数字母顺序排列的)。因此,火焰图顶部,只要有”平顶”(plateaus),则表示该函数可能存在<font color="#ff0000" size="3">性能问题</font>。</em></strong></p>
<p>提示:svg格式,当移动鼠标至其中一栏时会显示”Tips”信息,包含采样数、占采样总数百分比等等信息,有关火焰图更详细的资料参考:<a target="_blank" rel="noopener" href="http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html">cpu flame graph</a>。</p>
<p><img src="/2020/09/20/Golang%E6%80%A7%E8%83%BD%E5%88%86%E6%9E%90/torch.svg" alt="示例火焰图"></p>
<p>上图中,可以很明显观察到<strong>encoding/json.Unmarshal</strong>函数<strong>耗费了40%的CPU时间</strong>,是系统的性能瓶颈。定位了性能瓶颈之后,我们应当思考如何优化了。</p>
<h2><span id="性能优化">性能优化</span></h2><p>json反序列化成为系统性能瓶颈,可以说在情理之内,预期之外。业务角度,我们的Golang模块强依赖的下游服务,返回了一个大JSON(大约几十KB),且字段嵌套层级较深,json反序列化耗时是情理之内的,但是耗费了惊人的40%的CPU时间是预期之外的。</p>
<p>为了解决这个问题,有如下几条思路:</p>
<ul>
<li>对下游返回JSON”瘦身”,未使用的字段不做解析;</li>
<li>使用LRU Cache,在内存中缓存已反序列化之后的Struct;</li>
<li>使用性能更高的开源json序列化方案;</li>
</ul>
<p>如何进行性能调优,解决文章中的Case,笔者将会在新的文章中阐述思路,本文不做过多叙述。</p>
<h1><span id="联系作者">联系作者</span></h1><p>有更好的”性能调优”方式,也欢迎一块儿交流一下(邮箱:<a href="mailto:luckydreamcatcher@163.com">luckydreamcatcher@163.com</a>,微信号:15210466756);</p>
<h1><span id="参考资料">参考资料</span></h1><p><a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/51559344">Go Pprof</a></p>
<p><a target="_blank" rel="noopener" href="http://www.graphviz.org/download/source/">graphviz</a></p>
<p><a target="_blank" rel="noopener" href="http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html">cpu flame graph</a></p>
<p><a target="_blank" rel="noopener" href="https://www.ruanyifeng.com/blog/2017/09/flame-graph.html">如何读懂火焰图</a></p>
<p><a target="_blank" rel="noopener" href="https://ichrisking.github.io/2018/03/08/FlameGraph/">FlameGraph安装指南</a></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://keepalive555.github.io/2018/02/03/bloom-filter/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.jpg">
<meta itemprop="name" content="Lan Wei, Wang">
<meta itemprop="description" content="技术博客">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="枫叶居">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2018/02/03/bloom-filter/" class="post-title-link" itemprop="url">bloom filter(布隆过滤器)</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2018-02-03 00:21:14" itemprop="dateCreated datePublished" datetime="2018-02-03T00:21:14+08:00">2018-02-03</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2020-09-24 00:50:05" itemprop="dateModified" datetime="2020-09-24T00:50:05+08:00">2020-09-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84/" itemprop="url" rel="index"><span itemprop="name">数据结构</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h2><span id="应用场景">应用场景</span></h2><p>在互联网后台的开发工作中,笔者会经常遇到各种各样的**<em>白名单**</em>业务场景,比如以下典型场景:</p>
<ol>
<li>现有1亿个用户<code>user_id</code>,如何快速判断一个<code>user_id</code>是否在该白名单内</li>
<li>网络爬虫解析出一个页面的<code>url</code>清单,如何快速判断该<code>url</code>是否已经被抓取过</li>
<li>现有1亿个<code>user_id</code>,如何快速判断哪些<code>user_id</code>曾重复出现</li>
<li>服务器收到来自某个<code>ip</code>地址的请求,快速判断该<code>ip</code>地址是否在黑名单</li>
<li>……</li>
</ol>
<p>熟悉数据结构的读者,略微思考一下,便知以上若干问题的核心需求是:*<strong>设计一个内存占用少且又高效的查找算法/数据结构。*** 以场景1为例,大多数读者首先想到的数据结构为*</strong>哈希表***,任意元素均可在<code>O(1)</code>时间复杂度内快速完成查找。</p>
<p>假设哈希表的装载因子为0.5(实践中比较常见的取值),粗略计算一下1亿个int类型<code>user_id</code>的内存占用约为<code>745MB</code>,一个白名单要占用如此多的内存空间,这显然是不可接受的。那么我们如何既能达成我们的目的,又占用比较小的内存呢?</p>
<p>一个<code>user_id</code>是否在白名单之内,只可能存在两种取值——是/否,从**<em>香农信息论**</em> 角度来看,使用1个<code>bit</code>即可表示是/否两种取值。一个<code>int</code>类型变量可存储<code>2^32</code>种取值,而当前业务场景下我们仅仅需要<code>0</code>和<code>1</code>两种状态便可(存储4种状态使用2个<code>bit</code>,存储8种状态使用3个<code>bit</code>,以此类推…)。存储1亿个<code>bit</code>占用空间约为<code>11MB</code>,大大减少了内存占用,这便是<code>Bitmap</code>数据结构。</p>
<h2><span id="bitmap">Bitmap</span></h2><p><code>Bitmap</code>是一种紧凑的数据结构。以场景1为例,首先在内存中连续分配1亿个<code>bit</code>,要判断<code>user_id</code>为<code>1000</code>的用户是否在白名单之内,只需获取<code>bit</code>序列的第<code>1000</code>位<code>bit</code>的状态(1:<code>user_id</code>在白名单,0:<code>user_id</code>不在白名单)。如下为<code>c</code>语言版本的示例代码(也可查看笔者的<a target="_blank" rel="noopener" href="https://github.com/keepalive555/study/blob/master/bitmap/bitmap.c"><code>github</code></a>):</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">#<span class="meta-keyword">include</span> <span class="meta-string"><stdio.h></span></span></span><br><span class="line"></span><br><span class="line"><span class="meta">#<span class="meta-keyword">define</span> MAXSIZE 1024</span></span><br><span class="line"><span class="meta">#<span class="meta-keyword">define</span> SHIFT 5</span></span><br><span class="line"><span class="meta">#<span class="meta-keyword">define</span> MASK 0xffffffff</span></span><br><span class="line"><span class="keyword">static</span> <span class="keyword">unsigned</span> <span class="keyword">int</span> bitmap[MAXSIZE / (<span class="keyword">sizeof</span>(<span class="keyword">unsigned</span> <span class="keyword">int</span>) * <span class="number">8</span>) + <span class="number">1</span>];</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">set</span><span class="params">(<span class="keyword">int</span> n)</span> </span>{</span><br><span class="line"> <span class="comment">// 置位操作</span></span><br><span class="line"> bitmap[n >> SHIFT] |= <span class="number">1</span> << (n & MASK);</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">void</span> <span class="title">clr</span><span class="params">(<span class="keyword">int</span> n)</span> </span>{</span><br><span class="line"> <span class="comment">// 复位操作</span></span><br><span class="line"> bitmap[n >> SHIFT] &= ~(<span class="number">1</span> << (n & MASK));</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">test</span><span class="params">(<span class="keyword">int</span> n)</span> </span>{</span><br><span class="line"> <span class="comment">// 检测是否置位</span></span><br><span class="line"> <span class="keyword">int</span> i = n >> SHIFT;</span><br><span class="line"> <span class="keyword">if</span>(bitmap[i] & (<span class="number">1</span> << (n & MASK)))</span><br><span class="line"> <span class="keyword">return</span> <span class="number">1</span>;</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">main</span><span class="params">(<span class="keyword">void</span>)</span> </span>{</span><br><span class="line"> <span class="keyword">int</span> n = <span class="number">1023</span>;</span><br><span class="line"> <span class="built_in">printf</span>(<span class="string">"space: %d\n"</span>, <span class="keyword">sizeof</span>(bitmap) / <span class="keyword">sizeof</span>(<span class="keyword">unsigned</span> <span class="keyword">int</span>));</span><br><span class="line"> <span class="built_in">set</span>(n);</span><br><span class="line"> <span class="built_in">printf</span>(<span class="string">"has set flag: %d\n"</span>, test(n));</span><br><span class="line"> clr(n);</span><br><span class="line"> <span class="built_in">printf</span>(<span class="string">"has set flag: %d\n"</span>, test(n));</span><br><span class="line"> <span class="built_in">set</span>(n);</span><br><span class="line"> <span class="built_in">printf</span>(<span class="string">"has set flag: %d\n"</span>, test(n));</span><br><span class="line"> clr(n);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p><code>Bitmap</code>类似于哈希表,哈希规则便是将数字<code>n</code>映射到<code>Bitmap</code>第<code>n</code>个<code>bit</code>上。因此<code>Bitmap</code>在实际应用中存在一处问题——当<code>n</code>取值特别大时,<code>Bitmap</code>占用空间也会比较大。在此业务场景下,<code>Bitmap</code>数据结构是不合理的,所以便衍生出了<code>Bloom Filter</code>。</p>
<h2><span id="bloom-filter">Bloom Filter</span></h2><p><code>Bloom Filter</code>,中文译名布隆过滤器,是1970年由布隆提出来。布隆过滤器可以用于检索一个元素是否在一个集合中。朴素的讲,<code>BloomFilter</code>在<code>Bitmap</code>的基础上,将<code>Hash</code>函数的由一个扩展至多个。判断一个元素是否在一个集合中,仅需判断经过这些<code>Hash</code>函数后的值是否置位。布隆过滤器优点是*<strong>空间复杂度和时间复杂度*** 都优于一般的算法,缺点是*</strong>有一定的误识别率*** ,删除困难。</p>
<p><img src="/2018/02/03/bloom-filter/bloom-filter.png" alt="布隆过滤器"></p>
<h3><span id="算法原理">算法原理</span></h3><p>假设所选<code>Hash</code>函数在散列空间内分布均匀,即散列到每一个位置的概率相等(对于Hash函数的核心诉求)。假设<code>Bit</code>数组的大小为<code>m</code>,<code>k</code>为<code>Hash</code>函数的个数。</p>
<p><code>Bit</code>数组中某一位位置在元素插入时的<code>Hash</code>操作中没有被置位<code>1</code>的概率是:</p>
<p><img src="/2018/02/03/bloom-filter/1.png" alt="1"></p>
<p><code>k</code>个<code>Hash</code>函数散列之后该位置仍未被置位<code>1</code>的概率是:</p>
<p><img src="/2018/02/03/bloom-filter/2.png" alt="2"></p>
<p>连续插入<code>n</code>个元素,该位置仍未被置位<code>1</code>的概率是:</p>
<p><img src="/2018/02/03/bloom-filter/3.png" alt="3"></p>
<p>对立事件,该位为<code>1</code>的概率为:</p>
<p><img src="/2018/02/03/bloom-filter/4.png" alt="4"></p>
<h3><span id="代码实现">代码实现</span></h3><p><code>C</code>语言实现请参考笔者<code>Github</code>:<a target="_blank" rel="noopener" href="https://github.com/keepalive555/DataStructure/blob/master/bitmap/bloomfilter.c">bloomfilter.c</a></p>
<h2><span id="参考资料">参考资料</span></h2><p><a target="_blank" rel="noopener" href="http://matthias.vallentin.net/course-work/cs270-s11.pdf">Bloom Filter Pagers</a></p>
<p><a target="_blank" rel="noopener" href="https://www.cnblogs.com/liyulong1982/p/6013002.html">Bloom Filter</a></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://keepalive555.github.io/2018/01/08/Python-list%E5%AE%9E%E7%8E%B0/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.jpg">
<meta itemprop="name" content="Lan Wei, Wang">
<meta itemprop="description" content="技术博客">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="枫叶居">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2018/01/08/Python-list%E5%AE%9E%E7%8E%B0/" class="post-title-link" itemprop="url">Python list实现</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2018-01-08 21:57:35" itemprop="dateCreated datePublished" datetime="2018-01-08T21:57:35+08:00">2018-01-08</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2020-09-24 00:52:40" itemprop="dateModified" datetime="2020-09-24T00:52:40+08:00">2020-09-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/%E7%BC%96%E7%A8%8B%E8%AF%AD%E8%A8%80/" itemprop="url" rel="index"><span itemprop="name">编程语言</span></a>
</span>
,
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/%E7%BC%96%E7%A8%8B%E8%AF%AD%E8%A8%80/%E6%BA%90%E7%A0%81%E5%88%86%E6%9E%90/" itemprop="url" rel="index"><span itemprop="name">源码分析</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h2><span id="前言">前言</span></h2><p>本文所讲<code>Python</code>实现均为<code>CPython</code>,需读者具备一定的<code>C</code>语言阅读能力。本博文参考了**<em>《Python源码剖析》**</em>与<code>Python2.7</code>源码。<code>PyListObject</code>采用顺序存储(而非链式存储),熟悉<code>数据结构</code>的读者,能很容易明白本博文所讲内容。</p>
<h2><span id="介绍">介绍</span></h2><p><code>PyListObject</code>是<code>Python</code>提供的<code>List</code>容器实现,与<code>C++ STL</code>中的<code>vector</code>实现机制相近。<code>PyListObject</code>是变长对象同时也是可变对象(很显然,不同时刻<code>List</code>中可以存在不同数目的元素)。</p>
<p><code>PyListObject</code>定义如下:</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">typedef</span> <span class="class"><span class="keyword">struct</span> {</span></span><br><span class="line"> PyObject_VAR_HEAD</span><br><span class="line"> PyObject **ob_item;</span><br><span class="line"> <span class="keyword">int</span> allocated;</span><br><span class="line">} PyListObject;</span><br></pre></td></tr></table></figure>
<p><code>PyObject_VAR_HEAD</code>中的<code>ob_size</code>与<code>PyListObject</code>中的<code>allocated</code>字段分别标识了容器的现有*<strong>元素个数(size)**<em>与</em></strong>容器容量(capacity)***。<code>ob_item</code>为指向<code>PyObject *</code>的指针(即<code>PyObject *</code>数组),是<code>PyListObject</code>实现顺序存储的数组。</p>
<h2><span id="实现">实现</span></h2><h3><span id="1-创建对象">1、创建对象</span></h3><p><code>Python</code>提供了唯一创建<code>List</code>的函数——<code>PyList_New</code>。下面是简化的后<code>Python</code>创建<code>PyListObject</code>对象的过程。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">#<span class="meta-keyword">define</span> MAXFREELISTS 80</span></span><br><span class="line"><span class="keyword">static</span> PyListObject *free_lists[MAXFREELISTS];</span><br><span class="line"><span class="keyword">static</span> <span class="keyword">int</span> num_free_ists = <span class="number">0</span>;</span><br><span class="line"></span><br><span class="line"><span class="function">PyObject *<span class="title">PyList_New</span><span class="params">(<span class="keyword">int</span> size)</span> </span>{</span><br><span class="line"> PyListObject *op;</span><br><span class="line"> <span class="keyword">size_t</span> nbytes;</span><br><span class="line"> <span class="comment">// 判断int类型是否溢出,若溢出则返回内存分配失败</span></span><br><span class="line"> nbytes = size * <span class="keyword">sizeof</span>(PyObject *);</span><br><span class="line"> <span class="keyword">if</span>(nbytes / <span class="keyword">sizeof</span>(PyObject *) != (<span class="keyword">size_t</span>)size) {</span><br><span class="line"> <span class="keyword">return</span> PyErr_NoMemory();</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">//</span></span><br><span class="line"> <span class="keyword">if</span>(num_free_lists) {</span><br><span class="line"> <span class="comment">// 缓冲池可用,则从缓冲池取一可用List</span></span><br><span class="line"> num_free_lists--;</span><br><span class="line"> op = free_lists[num_free_lists];</span><br><span class="line"> _Py_NewReference((PyObject *)op);</span><br><span class="line"> } <span class="keyword">else</span> {</span><br><span class="line"> <span class="comment">// 缓冲池不可用,直接新建对象并为Python中的自动垃圾收集机制做一些工作</span></span><br><span class="line"> op = PyObject_GC_New(PyListObject, &PyList_Type);</span><br><span class="line"> }</span><br><span class="line"> </span><br><span class="line"> <span class="keyword">if</span>(size <= <span class="number">0</span>) {</span><br><span class="line"> op->ob_item = <span class="literal">NULL</span>;</span><br><span class="line"> } <span class="keyword">else</span> {</span><br><span class="line"> op->ob_item = (PyObject **)PyMem_MALLOC(nbytes);</span><br><span class="line"> <span class="built_in">memset</span>(op->ob_item, <span class="number">0</span>, nbytes);</span><br><span class="line"> }</span><br><span class="line"> op->ob_size = size;</span><br><span class="line"> op->allocated = size;</span><br><span class="line"> <span class="keyword">return</span> (PyObject *)op;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p><code>PyListObject</code>对象分为两部分:①<code>PyListObject</code>对象②<code>PyListObject</code>对象容纳的<code>PyObject</code>元素。</p>
<h3><span id="2-设置元素">2、设置元素</span></h3><p>前面提到<code>PyListObject</code>是顺序存储,可以**<em>随机访问**</em>。通过下标设置<code>List</code>中元素值,是由<code>PyList_SetItem</code>函数实现的。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">PyList_SetItem</span><span class="params">(<span class="keyword">register</span> PyObject *op, <span class="keyword">register</span> <span class="keyword">int</span> i, <span class="keyword">register</span> PyObject *new_item)</span> </span>{</span><br><span class="line"> <span class="comment">// 保存指向旧元素的指针,用于减少引用计数</span></span><br><span class="line"> <span class="keyword">register</span> PyObject *olditem;</span><br><span class="line"> <span class="keyword">register</span> PyObject **p;</span><br><span class="line"> <span class="comment">// 检查索引值得合法性</span></span><br><span class="line"> <span class="keyword">if</span>(i < <span class="number">0</span> || i>= (PyListObject)op->ob_size) {</span><br><span class="line"> <span class="comment">// 报索引错误</span></span><br><span class="line"> <span class="keyword">return</span> <span class="number">-1</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 设置元素</span></span><br><span class="line"> p = ((PyListObject*)op)->ob_item + i;</span><br><span class="line"> olditem = *p;</span><br><span class="line"> Py_XDECREF(olditem);</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<h3><span id="3-插入元素">3、插入元素</span></h3><p>了解<code>顺序存储</code>的读者,很容易想到新元素的插入会导致元素的移动。<code>PyListObject</code>的实现也不例外,而这其中又牵扯了<code>PyListObject.ob_item</code>的*<strong>扩容**<em>与</em></strong>缩容***(参考<code>Redis</code>或者其它若干软件的实现,都会有类似机制)。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">PyList_Insert</span><span class="params">(PyObject *op, Py_ssize_t where, PyObject *new_item)</span> </span>{</span><br><span class="line"> <span class="keyword">return</span> insl((PyListObject *)op, where, newitem);</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">int</span> <span class="title">insl</span><span class="params">(PyListObject *self, Py_ssize_t where, PyObject *v)</span> </span>{</span><br><span class="line"> Py_ssize_t i, n = self->ob_size;</span><br><span class="line"> PyObject **items;</span><br><span class="line"> <span class="comment">// 调整列表容量</span></span><br><span class="line"> <span class="keyword">if</span>(list_resize(self, n+<span class="number">1</span>) == <span class="number">-1</span>)</span><br><span class="line"> <span class="keyword">return</span> <span class="number">-1</span>;</span><br><span class="line"> <span class="comment">// 确定插入点</span></span><br><span class="line"> <span class="keyword">if</span>(where < <span class="number">0</span>) {</span><br><span class="line"> <span class="comment">// 负数索引</span></span><br><span class="line"> where += n;</span><br><span class="line"> <span class="keyword">if</span>(where < <span class="number">0</span>)</span><br><span class="line"> where = <span class="number">0</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">if</span>(where > n)</span><br><span class="line"> where = n;</span><br><span class="line"> <span class="comment">// 插入元素</span></span><br><span class="line"> items = self->ob_item;</span><br><span class="line"> <span class="keyword">for</span>(i = n; --i >= where; )</span><br><span class="line"> <span class="comment">// 从后往前将元素后移一个单位,空出新元素存储单元</span></span><br><span class="line"> item[i+<span class="number">1</span>] = item[i]</span><br><span class="line"> <span class="comment">// 使用宏Py_INCREF增加元素v的引用计数</span></span><br><span class="line"> Py_INCREF(v);</span><br><span class="line"> item[where] = v;</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>其中函数<code>list_resize</code>为<code>PyListObject</code>对象*<strong>扩容**<em>与</em></strong>缩容***的关键。<code>list_resize</code>函数的实现如下:</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">int</span> <span class="title">list_resize</span><span class="params">(PyObjectList *self, <span class="keyword">int</span> newsize)</span> </span>{</span><br><span class="line"> PyObject **items;</span><br><span class="line"> <span class="keyword">size_t</span> new_allocated;</span><br><span class="line"> <span class="keyword">int</span> allocated = self->allocated;</span><br><span class="line"> <span class="comment">// 不需要申请内存</span></span><br><span class="line"> <span class="keyword">if</span>(allocated >= newsize && newsize >= (allocated >> <span class="number">1</span>)) {</span><br><span class="line"> self->ob_size = newsize;</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 计算重新申请内存的大小</span></span><br><span class="line"> new_allocated = (newsize >> <span class="number">3</span>) + (newsize < <span class="number">9</span> ? <span class="number">3</span> : <span class="number">6</span>) + newsize;</span><br><span class="line"> <span class="keyword">if</span>(newsize == <span class="number">0</span>)</span><br><span class="line"> new_allocated = <span class="number">0</span>;</span><br><span class="line"> <span class="comment">// 扩展列表</span></span><br><span class="line"> items = self->ob_items;</span><br><span class="line"> <span class="comment">// 最终调用c语言的realloc</span></span><br><span class="line"> PyMem_RESIZE(item, PyObject *, new_allocated);</span><br><span class="line"> self->ob_itme = items;</span><br><span class="line"> self->ob_size = newsize;</span><br><span class="line"> self->allocated = new_allocated;</span><br><span class="line"> <span class="keyword">return</span> <span class="number">0</span>;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>当<code>List</code>新的元素个数<code>newsize</code>,满足条件:<code>allocated/2 <= newsize <= allocated</code>时,不需要进行<code>realloc</code>。当<code>newsize >= allocated</code>时,<code>PyObjectList</code>会进行*<strong>扩容**<em>操作,当<code>newsize < allocated/2</code>时<code>PyObjectList</code>会进行</em></strong>缩容***操作。</p>
<h2><span id="对象池">对象池</span></h2><p><code>CPython</code>为了解决频繁创建对象带来的性能问题(大多数对性能要求较高的<code>C</code>程序均采用类似机制),采用了大量的<code>对象池</code>技术——<code>PyListObject</code>的实现也不例外。如果读者对此类技术不熟悉,请参阅**<em>对象池**</em>设计模式。</p>
<p>在如上<code>PyList_New</code>函数的实现代码中,<code>free_lists</code>指针数组便是用于<code>PyListObject</code>创建的对象池。我们可以看到如果存在可用的<code>PyListObject</code>,<code>Python</code>便会从<code>对象池</code>中取出并返回一个<code>PyListObject</code>对象。那么<code>PyListObject</code>对象是**<em>何时、如何**</em>归还给对象池的呢?答案就在销毁<code>PyListObject</code>的<code>list_dealloc</code>函数里。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">void</span> <span class="title">list_dealloc</span><span class="params">(PyListObject *op)</span> </span>{</span><br><span class="line"> <span class="keyword">int</span> i;</span><br><span class="line"> <span class="keyword">if</span>(op->ob_item != <span class="literal">NULL</span>) {</span><br><span class="line"> i = op->ob_size;</span><br><span class="line"> <span class="keyword">while</span>(--i >= <span class="number">0</span>) {</span><br><span class="line"> Py_XDECREF(op->ob_item[i]);</span><br><span class="line"> }</span><br><span class="line"> PyMem_FREE(op->ob_item);</span><br><span class="line"> }</span><br><span class="line"> <span class="comment">// 释放PyListObject自身</span></span><br><span class="line"> <span class="keyword">if</span>(num_free_lists < MAXFREELISTS && PyList_CheckExact(op))</span><br><span class="line"> free_lists[num_free_lists++] = op;</span><br><span class="line"> <span class="keyword">else</span></span><br><span class="line"> op->ob_type->tp_free((PyObject *)op);</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://keepalive555.github.io/2017/12/29/SkipList/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.jpg">
<meta itemprop="name" content="Lan Wei, Wang">
<meta itemprop="description" content="技术博客">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="枫叶居">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2017/12/29/SkipList/" class="post-title-link" itemprop="url">SkipList研究</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2017-12-29 00:37:10" itemprop="dateCreated datePublished" datetime="2017-12-29T00:37:10+08:00">2017-12-29</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2020-09-24 00:51:16" itemprop="dateModified" datetime="2020-09-24T00:51:16+08:00">2020-09-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84/" itemprop="url" rel="index"><span itemprop="name">数据结构</span></a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h2><span id="吐槽">吐槽</span></h2><p>作为一名学渣,每次回头去翻看一下大学课程的基础知识,总会有不同的感受。笔者也总想着把自己工作中领悟的做归纳。关于查找算法,思想大概可以归类为三类(大神请绕路):</p>
<ul>
<li>顺序查找</li>
<li>二分查找(插入查找、斐波拉切查找…)</li>
<li>哈希查找</li>
</ul>
<p>顺序查找是我们常用的遍历。在对性能要求比较高的业务场景下,我们便需要考虑其他更好的实现方式了(例如:为了避免全表扫描,数据库通过<code>B+ Tree</code>索引提高查找效率)。哈希查找,时间复杂度为<code>O(1)</code>,是一种常见且应用广泛的查找算法。本文将在剩余篇幅对二分查找法进行吐槽。</p>
<h2><span id="思考">思考</span></h2><p>咦?今天我们讨论的不是<code>SkipList</code>吗,为什么会谈到二分查找法,接下来笔者将阐述一下原因。在实际工程应用中,算法与数据结构是相辅相成的,相互依存,相互影响的, 没有<code>数据结构</code>支撑的算法只能是空中阁楼。接下来,我们思考尝试为二分查找(或类似思想)寻找一个适合的**<em>数据结构**</em>。</p>
<p>通常会从<code>CRUD</code>(即增、删、改、查)四个角度,结合具体应用场景去衡量一个数据结构的适用性。我们知道数据的存储方式分为两种:①顺序存储②链式存储。<strong>顺序存储</strong>中,有序列表的元素在内存中紧紧相连,可以**<em>随机访问**</em>(直接用下标访问,时间复杂度<code>O(1)</code>),能用二分查找法快速定位节点。但是顺序存储对<code>增、删</code>操作的处理比较费力(当删除列表中一个元素时,列表应当将该元素后面的元素前移,填补空的节点,同样增加元素时亦是如此)。</p>
<p>顺序存储不适用于<code>增 、删</code>操作频繁的应用场景,那么我们考虑一下*<strong>链式存储**<em>。</em></strong>链表*<strong>能很好的处理<code>增、删</code>频繁的场景。但是链表一般**<em>顺序访问</em></strong>(即读取第一个元素后才可以读取第二个元素,以此类推),显然传统的链表数据结构无法应用二分的思想进行快速查找。</p>
<p>聪明的人们结合<code>二叉树</code>,发明了**<em>二叉查找树**</em>———既可以二分查找,又能够快速<code>添加、删除</code>元素的数据结构。这正是我们期望的能够应用二分查找的完美数据结构吗?很遗憾,并不是。二叉查找树在最坏情况下可能变成一个链表。于是,在二分查找树的基础上,就出现了<code>AVL</code>平衡树。<code>AVL</code>树在<code>增、删</code>节点时,为了保持树的平衡,会进行左旋,右旋操作,增加了<code>增、删</code>操作的复杂度。于是乎根据人们在发明了<code>B-Tree</code>,<code>B+ Tree</code>,<code>红黑树</code>等。但是<code>AVL</code>树实现起来比较复杂,平衡操作较难理解。</p>
<p>所以便有了<code>SkipList</code>。</p>
<h2><span id="实现">实现</span></h2><p>百度搜索网上一些<code>SkipList</code>的实现,代码多多少少存在一些瑕疵。笔者根据自己对<code>SkipList</code>的理解,结合网上的一些实现,整理出了一份<code>C</code>语言版本的<code>SkipList</code>实现。读者可以参阅笔者的<code>GitHub</code>,源文件:<a target="_blank" rel="noopener" href="https://github.com/keepalive555/study/blob/master/skiplist/skiplist.c">https://github.com/keepalive555/study/blob/master/skiplist/skiplist.c</a>。</p>
<p>其中<code>SkipList</code>新建<code>Node</code>节点,随机获取节点<code>level</code>值的<code>random_level</code>函数(源码如下所示),是笔者摘抄自<code>Redis</code>源码。*<strong>该函数是保证<code>SkipList</code>的<code>CRUD</code>操作时间复杂度为**<em>O(logN)</em></strong>的核心所在***。</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">#<span class="meta-keyword">define</span> MAX_LEVEL 32</span></span><br><span class="line"><span class="meta">#<span class="meta-keyword">define</span> P 0.25</span></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">random_level</span><span class="params">(<span class="keyword">void</span>)</span> </span>{ </span><br><span class="line"> <span class="keyword">int</span> level = <span class="number">1</span>; </span><br><span class="line"> <span class="keyword">while</span> ((random() & <span class="number">0xFFFF</span>) < (P * <span class="number">0xFFFF</span>)) </span><br><span class="line"> level += <span class="number">1</span>; </span><br><span class="line"> <span class="keyword">return</span> (level < MAX_LEVEL) ? level : MAX_LEVEL; </span><br><span class="line">}</span><br></pre></td></tr></table></figure>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://keepalive555.github.io/2017/12/23/raft%E5%8D%8F%E8%AE%AE/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.jpg">
<meta itemprop="name" content="Lan Wei, Wang">
<meta itemprop="description" content="技术博客">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="枫叶居">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2017/12/23/raft%E5%8D%8F%E8%AE%AE/" class="post-title-link" itemprop="url">Raft协议</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2017-12-23 21:14:30" itemprop="dateCreated datePublished" datetime="2017-12-23T21:14:30+08:00">2017-12-23</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2017-12-30 00:49:33" itemprop="dateModified" datetime="2017-12-30T00:49:33+08:00">2017-12-30</time>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h2><span id="前言">前言</span></h2><p>分布式,很多初学者对这个词的第一印象——高大上技术范儿。抛开技术细节不谈,纵观后台技术的发展,存在着普遍适用的规律,一项新技术的诞生,总是解决一些现有架构无法解决的问题。如果读者凭空去学习分布式,便容易坠入云里雾里。本文作为笔者自己学习的一个梳理,以实际问题出发阐述了笔者对<code>Raft</code>协议的理解。本文并不对<code>Raft</code>协议的实现机制做详细的描述,只是从一个新手解决问题的角度去阐述<code>Raft</code>协议做了些什么,不正确的地方请读者指正(邮箱:<a href="mailto:[email protected]">[email protected]</a>)。</p>
<h2><span id="思考">思考</span></h2><p>以经典单数据库实例架构(这也是很多企业级应用的典型架构)为例,所有的业务数据均存储于单机数据库,当数据库实例<code>Crash</code>了以后,业务便受到影响,在大多数情况下,这种<code>Crash</code>对企业业务的影响是可控范的。然而在互联网应用中,哪怕是一分钟的<code>Crash</code>对企业来说也是致命的,比如前段时间,美团的外卖系统出现崩溃,整个服务停摆几个小时,造成大量用户流失到饿了么平台。 </p>
<p>笔者尝试根据自己的经验去解决该问题,为了让单机数据库实例在<code>Crash</code>了以后,整个系统仍然保持可用,我们很容易想到的一个策略——冗余(比如你在单位请假了需要有人代替你继续工作而不影响业务)。我们增加了一台数据库实例<code>B</code>(原来的数据库实例用<code>A</code>表示),在实例<code>A</code>挂掉了之后,我们期望<code>B</code>可以代替<code>A</code>继续提供服务,*<strong>所以<code>B</code>与<code>A</code>必须具备一样的数据**<em>,在分布式里面这个称作</em></strong>一致性*<strong>。<code>Raft</code>协议为**<em>分布式一致性协议</em></strong>的一种实现,主要目标就是解决上述这类问题。</p>
<p>脱离现有的<code>MySQL</code>,<code>Redis</code>,<code>Kafka</code>等高可用方案(因为这些系统为了性能而做出一些折中),我们根据自己的诉求,去设计一个高可用的存储系统,需要注意哪些问题呢?假设我们的存储系统有<code>A</code>,<code>B</code>,<code>C</code>等3个节点用来保持高可用,那我们该怎么保持<code>A</code>,<code>B</code>,<code>C</code>3个节点内数据的一致性呢?</p>
<ul>
<li>一致性由客户端保证还是服务端保证</li>
<li>如何保证<code>A,B,C</code>或更多节点的数据一致性</li>
</ul>
<p>首先分析第一个问题,假设一致性工作是由客户端保证的(客户端向<code>A</code>写入数据的同时向<code>B</code>和<code>C</code>写入数据,为保证<code>A,B,C</code>的一致性,需<code>A,B,C</code>3个节点全部写入成功,客户端回才判定写入成功),我们可能会遇到如下情况:</p>
<ul>
<li><code>B</code>下线了一段时间又重新上线,因为客户端未保存<code>B</code>处于下线状态这段时间的数据,所以<code>B</code>中就会缺失这部分数据,因而<code>B</code>中数据会与<code>A</code>与<code>C</code>中数据不一致。</li>
<li>客户端向<code>A</code>与<code>C</code>中写入数据成功,但向<code>B</code>中写入数据失败,这次写入应当被认定为失败(因为<code>A</code>,<code>B</code>,<code>C</code>中数据不一致,也无法通过其他途径达到一致),我们期望整个系统可以表现的犹如一个**<em>事务**</em>,要么全部成功,要么全部失败回滚修改,客户端无法提供这种机制。</li>
</ul>
<p>综上,**<em>由客户端保证数据的一致性是不可取的**</em>。 </p>
<p>我们将一致性保证工作放在服务端实现,那么我们如何保证<code>A,B,C</code>三节点数据的一致性呢?首先我们思考一个问题,**<em>我们无法预知<code>A,B,C</code>三个节点中哪个节点会意外挂掉,所以客户端不应该至同单一节点建立联系**</em>,也就是说——<code>A,B,C</code>3个节点对外应当表现为一个整体,也就是集群<code>Cluster</code>。那么客户端该如何向<code>A,B,C</code>组成的集群写入数据?以下是笔者想到的实现方式:</p>
<ul>
<li>所有客户端均向<code>A,B,C</code>中某一节点(比如<code>A</code>)写入数据,由该节点将数据拷贝至其它节点以达到一致性。</li>
<li>向建立连接的节点写入数据,比如<code>客户端1</code>同<code>A</code>建立连接,<code>客户端1</code>向<code>A</code>写入数据,<code>客户端2</code>同<code>B</code>建立连接,<code>客户端2</code>向<code>B</code>写入数据,以此类推。</li>
</ul>
<p>读者是否觉得以上两种实现方式似曾相识——这和*<strong>并发编程**<em>下的并发更改共享变量问题相似,由经验我们可知,我们最好是将对共享的操作</em></strong>串行,有序的***执行。同样,如果多个客户端通过多个节点向集群写入数据,为了达到每个节点都有一份完整数据的目的,多个节点间会进行通讯,数据合并,而这其中又牵扯了数据的顺序等许多问题,工程实现起来比较复杂。<br>当然不是说不可以,笔者没见过这么做的~ ~)</p>
<p>方式一为目前流行的一致性解决思路,<code>Raft</code>协议采用了该思路,<code>Raft</code>协议解决了方式一面临的两大问题:</p>
<ul>
<li>集群启动(或者写入节点下线)时,如何选举出一个节点作为写入节点</li>
<li>写入节点如何与其它节点通讯,复制数据,保持数据在各节点的一致性</li>
</ul>
<p>以上两大问题便是<code>Raft</code>协议的两大功能:</p>
<ul>
<li><code>Leader Election</code></li>
<li><code>Log Replication</code></li>
</ul>
<p>分布式中任何环节都是不可靠的,实际问题比本人论述的复杂的多,但明确了上述问题,再去研究<code>Raft Paper</code>时,读者便可以快速掌握<code>Raft</code>协议。</p>
<p>建议大家观看<code>Raft</code>协议动画,简单明了生动:<a target="_blank" rel="noopener" href="http://thesecretlivesofdata.com/raft/">http://thesecretlivesofdata.com/raft/</a></p>
<h2><span id="参考">参考</span></h2><p>[1] <a target="_blank" rel="noopener" href="https://raft.github.io/raft.pdf">Raft Pager</a></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://keepalive555.github.io/2017/12/23/%E7%BA%BF%E6%AE%B5%E6%A0%91%E5%BA%94%E7%94%A8/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.jpg">
<meta itemprop="name" content="Lan Wei, Wang">
<meta itemprop="description" content="技术博客">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="枫叶居">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2017/12/23/%E7%BA%BF%E6%AE%B5%E6%A0%91%E5%BA%94%E7%94%A8/" class="post-title-link" itemprop="url">线段树应用(编辑中)</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2017-12-23 02:25:15" itemprop="dateCreated datePublished" datetime="2017-12-23T02:25:15+08:00">2017-12-23</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2017-12-25 22:08:09" itemprop="dateModified" datetime="2017-12-25T22:08:09+08:00">2017-12-25</time>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">