-
Notifications
You must be signed in to change notification settings - Fork 31
/
index.html
8946 lines (5148 loc) · 270 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Arabic & Persian Layout Requirements</title>
<script src="https://www.w3.org/Tools/respec/respec-w3c" async class="remove"></script>
<script class="remove">
var respecConfig = {
// specification status (e.g. WD, LCWD, WG-NOTE, etc.). If in doubt use ED.
specStatus: "ED",
//publishDate: "2015-07-21",
//previousPublishDate: "2018-02-22",
//previousMaturity: "FPWD",
noRecTrack: true,
shortName: "alreq",
copyrightStart: "2015",
edDraftURI: "https://w3c.github.io/alreq/",
// if this is a LCWD, uncomment and set the end of its review period
// lcEnd: "2009-08-05",
// editors, add as many as you like
// only "name" is required
editors: [
{ name: "Richard Ishida", mailto: "[email protected]", company: "W3C", w3cid: 3439 },
],
group: "i18n",
github: "w3c/alreq",
localBiblio: {
"UBA-BASICS": {
"authors": [
"Richard Ishida",
],
"href": "https://www.w3.org/International/articles/inline-bidi-markup/uba-basics",
"publisher": "World Wide Web Consortium",
"title": "Unicode Bidirectional Algorithm basics",
"id": "UBA-BASICS",
},
"W3-ARAB-MATH": {
"authors": [
"Azzeddine Lazrek",
"Mustapha Eddahibi",
"Khalid Sami",
"Bruce R. Miller"
],
"href": "https://www.w3.org/TR/arabic-math/",
"publisher": "World Wide Web Consortium",
"title": "Arabic mathematical notation",
"id": "W3-ARAB-MATH"
}
},
postProcess: [
async function importStyleSheet() {
const elems = document.querySelectorAll(`link[rel='stylesheet'][data-import]`)
await Promise.all(
[...elems].map(async link => {
const text = await fetch(link.href).then(r => r.text())
const style = document.createElement("style")
style.textContent = text
link.replaceWith(style)
})
)
}
],
};
</script>
<link rel="stylesheet" data-import href="https://w3c.github.io/i18n-drafts/style/respec_2022.css">
<link rel="stylesheet" href="local.css">
</head>
<body>
<section id="abstract">
<p>This document describes requirements for the layout and presentation of text in languages that use the Arabic script when they are used by Web standards and technologies, such as HTML, CSS, Mobile Web, Digital Publications, and Unicode.</p>
</section>
<section id="sotd">
<p>This document describes the basic requirements for Arabic script layout and text support on the Web and in eBooks. These requirements provide information for Web technologies such as CSS, HTML and digital publications about how to support users of Arabic scripts. Currently the document focuses on Standard Arabic and Persian.</p>
<p>The editor's draft of this document was developed by the <a href="http://w3c.github.io/alreq/homepage/">Arabic Layout Task Force</a>, part of the W3C <a href="https://www.w3.org/International/ig/">Internationalization Interest Group</a>. It is published by the <a href="https://www.w3.org/International/core/">Internationalization Working Group</a>. The end target for this document is a Working Group Note.</p>
<p data-lang="en">To make it easier to track comments, please raise a separate issue for each comment, and at the start of the issue add a URL pointing to the section you are commenting on.</p>
</section>
<section id="h_introduction">
<h2>Introduction</h2>
<section id="h_acknowledgements">
<h3>Contributors</h3>
<p>The information in this document was created by the following participants in the W3C's <a href="https://w3c.github.io/alreq/home">Arabic Script Language Enablement</a> community: Behnam Esfahbod (Quora/Virgule Typeworks), Mostafa Hajizadeh, Najib Tounsi (Ecole Mohammadia d'Ingénieurs), Richard Ishida (W3C), Shervin Afshar (Netflix), and Titus Nemeth.</p>
<p>Additional information and clarifications were provided by Khaled Hosny, and Azzeddine Lazrek.</p>
<p data-lang="en">See also the <a href="https://github.com/w3c/alreq/graphs/contributors">GitHub contributors list</a> for the Arabic Script Enablement project, and the <a href="https://github.com/w3c/alreq/issues?q=is%3Aissue">discussions</a>.</p>
</section>
<section id="h_about_this_document">
<h3>About this document</h3>
<p>The aim of this document is to describe the basic requirements for Arabic script layout and text support on the Web and in eBooks. These requirements provide information for Web technologies such as CSS, HTML and digital publications, and for application developers, about how to support users of Arabic scripts. The document focuses on Standard Arabic and Persian.</p>
<p>The document focuses on typographic layout issues. For a deeper understanding of the Arabic script itself and how it works see <cite>Modern Standard Arabic Orthography Notes</cite>, which includes topics such as: <a href="https://r12a.github.io/scripts/arab/arb.html#phonology">Phonology</a>, <a href="https://r12a.github.io/scripts/arab/arb.html#vowels">Vowels</a>, <a href="https://r12a.github.io/scripts/arab/arb.html#consonants">Consonants</a>, <!--a href="https://r12a.github.io/scripts/arab/arb.html#encoding">Encoding choices</a>,--> and <a href="https://r12a.github.io/scripts/arab/arb.html#numbers">Numbers</a>.</p>
<p>This document should contain no reference to a particular technology. For example, it should not say "CSS does/doesn't do such and such", and it should not describe how a technology, such as CSS, should implement the requirements. It is technology agnostic, so that it will be evergreen, and it simply describes how the script works. The gap analysis document is the appropriate place for all kinds of technology-specific information.</p>
</section>
<section id="h_gap_analysis">
<h3>Gap analysis</h3>
<p>This document should be used alongside a separate document, <a href="https://www.w3.org/TR/alreq-gap/"><cite>Arabic Script Gap Analysis</cite></a>, which describes gaps in support for various languages written using the Arabic script on the Web, and prioritises and describes the impact of those gaps on the user.</p>
<p>Gap reports are brought to the attention of spec and browser implementers, and are tracked via the <a href="https://github.com/orgs/w3c/projects/95" target="_blank">Gap Analysis Pipeline</a>. (<a href="https://github.com/orgs/w3c/projects/95/views/1?filterQuery=label%3A%22doc%3Aarab_ks%22" target="_blank">Filter it for Kashmiri</a>)</p>
</section>
<section id="h_info_requests">
<h3>Other related resources</h3>
<p>To complement any content authored specifically for this document, a separate document, Arabic Script Layout Requirements, points to information, tests, GitHub discussions, etc., for a wide range of languages that use a form of the Arabic script.</p>
<p>The <a href="https://w3c.github.io/typography/"><cite>Language enablement index</cite></a> points to this document and others, and provides a central location for developers and implementers to find information related to various scripts.</p>
<p>The W3C also has a repository with discussion threads related to the Arabic script, including requests from developers to the user community for information about how scripts/languages work, and a notification system that tracks issues in W3C working groups related to Arabic scripts. See a list of <a target="_blank" href="https://github.com/w3c/alreq/issues?q=is%3Aissue+is%3Aopen+label%3As%3Aarab+label%3Aquestion+">unresolved questions</a> for Arabic script experts. See also the <a href="https://w3c.github.io/alreq/home">repository home page</a>.</p>
</section>
<section id="h_languages">
<h3>Language scope</h3>
<p>This document is focused on two languages: Standard Arabic and Persian.</p>
<section id="h_standard_arabic_language">
<h4>Standard Arabic language</h4>
<p><dfn>Standard Arabic</dfn>—a.k.a. Modern Standard Arabic or Literary Arabic—is the standardized and literary variety of Arabic used in writing and in most formal speech in countries of Northern Africa and West Asia. Regional and classical dialects of Arabic may differ in layout and text details and are <em>not</em> covered by this document.</p>
<p>However, there are some major differences in common practices between the <dfn>Western Arab regions</dfn>—that is North-West Africa—and <dfn>Eastern Arab regions</dfn>—which is North-East Africa and West Asia. For example, the numeral digits used in the two regions and their formatting are vastly different. Although, there's no clear line between the Eastern and Western Arab regions.</p>
</section>
<section id="h_persian_language">
<h4>Persian language</h4>
<p><dfn>Persian</dfn>—a.k.a. Modern Persian—is the standardized and literary variety of the official languages used in Iran and Afghanistan. The dialect of Persian in Iran is also called Western Persian, and is locally known as <span class="qterm">Farsi</span>. The dialect of Persian in Afghanistan is also known as Eastern Persian, and is locally known as Dari.</p>
<p><dfn>Tajik</dfn>—a.k.a Tajiki or Tajiki Persian—is the Persian language as used in Tajikistan. It is written in the Cyrillic script, therefore, is <em>not</em> covered by this document.</p>
</section>
</section>
</section>
<section>
<h2 id="h_text_direction">Text direction</h2>
<section id="h_writing_mode">
<h3>Writing mode</h3>
<p>Arabic script is written from right to left. Numbers, even Arabic numbers, are written from left to right, as is text in a script that is normally left-to-right.</p>
<p>When the main script is Arabic, the layout and structure of pages and documents are also set from right to left.</p>
<section id="h_vertical_text">
<h4>Vertical text</h4>
<p>In situations where short runs of text run vertically, for example on book spines or in table headers, Arabic text is rotated to run along the line. It may be rotated so that the tops of the letters face to the left (read the text from top to bottom) or to the right (read the text from bottom to top).</p>
<figure id="fig_vertical_spine">
<img style="width: 89px;" src="images/vertical-rotated-left.png" alt=
"Vertical Arabic top down"> <img style="width: 90px;" src="images/vertical-rotated-right.png" alt="Vertical Arabic bottom up">
<figcaption>Vertical Arabic, top-down and bottom-up flow.</figcaption>
</figure>
<p>The flow of text, top-down vs. bottom-up, may depend on regions or authors. The left
case in [[[#fig_vertical_spine]]] is a typically francophone style for book
spines, whereas the right case is an anglophone style.</p>
<section id="h_vertical_embedding">
<h5>Arabic embedded in vertically orientated text</h5>
<p>When Arabic is embedded in body text that is set vertically, such as CJK or Mongolian text, it is also normally rotated so as to run along the line. Typically, the Arabic text will be read from bottom to top of the line.</p>
<p>When the Arabic text spans more than one line, the text is wrapped in the same way as it would be in horizontal text, ie. the first part of the Arabic text is kept on the
first line, and subsequent parts of the Arabic text appear on subsequent lines.
Therefore, in the case of Mongolian, where lines are read left-to-right, the Arabic text lines are also read left-to-right, whereas in Chinese or Japanese, where vertical lines are normally read right-to-left, the Arabic text lines are also read right-to-left.</p>
<figure id="fig_arabic_in_chinese">
<img style="width: 64px; height: 220px;" src="images/embedded-ar.png" alt=" ">
<figcaption>Arabic text embedded in vertical Chinese.</figcaption>
</figure>
</section>
<section id="h_vertical_upright">
<h5>Upright vertical Arabic text</h5>
<p>There are attested cases of Arabic text arranged vertically with the letters upright, for example in signboards for cinemas or theatres. It is not clear, at this point whether this a standard approach for Arabic text, or just an unusual layout that mimics Western typographic approaches.</p>
<figure id="fig_upright_vertical">
<img src="images/odeonMovie.png" alt="Arabic Upright on a front movie">
<figcaption>Arabic upright on a front movie (see arrow).</figcaption>
</figure>
<p>The following should be noted in <a href="#fig_upright_vertical"></a>.</p>
<ol>
<li>Letters flow from top to bottom (as with Latin script).</li>
<li>Isolated form of letters seems more appropriate here than the joined form.</li>
<li>Diacritic marks, if any, must be applied to letters and not appear on separate
lines.</li>
</ol>
</section>
</section>
</section>
<section id="h_bidi_text">
<h3>Bidirectional text</h3>
<section id="h_bidirectional_text">
<h4>The Unicode Bidirectional Algorithm</h4>
<p>The <dfn data-lt="bidirectional algorithm|bidi algorithm"><a href=
"http://www.unicode.org/reports/tr9/">Unicode Bidirectional Algorithm</a></dfn> (or
<span class="qterm">bidi algorithm</span>, for short) [[UAX9]] details an algorithm for rendering right-to-left text and covers a myriad of situations, mixing different kinds of characters. A simpler explanation of the basics of the algorithm exists in the W3C article <a href="https://www.w3.org/International/articles/inline-bidi-markup/uba-basics">Unicode Bidirectional Algorithm basics</a>. [[UBA-BASICS]] You can refer to these documents for more information about Unicode’s bidirectional algorithm.</p>
<p>A brief overview of the <a>bidirectional algorithm</a> follows, because the direction is an essential part of how Arabic script is used.</p>
<p>The characters of a text are digitally stored and transferred in the same order that they are typed by a user. This is the order in which the text is read and pronounced by people and held in memory by software applications, as shown in <a href="#fig_order_in_memory"></a> for a sample text.</p>
<figure id="fig_order_in_memory">
<img src="images/order-in-memory.svg" alt="The order of characters in memory" style="width:80%">
<figcaption>The order of characters in memory.</figcaption>
</figure>
<p>But the order used when displaying text is different. The purpose of the bidi algorithm is to find display positions for the characters of a text. These positions are solely used for displaying texts. <a href="#fig_order_when_displayed"></a> shows the same sample text when prepared for display with the bidi algorithm.</p>
<figure id="fig_order_when_displayed">
<img src="images/order-when-displayed.svg" alt="The order of characters when displayed" style="width:80%">
<figcaption>The order of characters when displayed.</figcaption>
</figure>
<p>An initial step of the process involves determining each paragraph’s <span class=
"qterm">base direction</span>: whether the paragraph is left-to-right or right-to-left. The base direction is either explicitly set by the author, inherited from the page, or (typically for user-generated content) detected based on the content of the paragraph. The base direction has two important uses later in the process.</p>
<p>The next step is to split the text into <span class="qterm">directional runs</span>. Each directional run is a sequence of characters with the same direction.</p>
<figure id="fig_directional_runs">
<img src="images/directional-runs.svg" alt="Splitting a text into 3 directional runs">
<figcaption>Splitting a text into 3 directional runs.</figcaption>
</figure>
<p>Inside each run, all the characters follow the same order. The runs themselves are ordered for visual representation from left to right or from right to left, depending on the base direction of the paragraph. <a href="#fig_order_of_directional_runs"></a> demonstrates an example of this. This is the first effect of the base direction.</p>
<figure id="fig_order_of_directional_runs">
<img src="images/order-of-directional-runs.svg" alt=
"The effect of base direction on the order of runs">
<figcaption>The effect of base direction on the order of runs.</figcaption>
</figure>
<p>Unicode has a <span class="qterm">bidi class</span> (or <span class="qterm">bidi
type</span>) property defined for each character that is used to determine the direction of each character. All the Arabic letters are marked as right-to-left characters, while Latin characters have the left-to-right category.</p>
<p>Some characters, mostly punctuation marks, are <span class="qterm">neutral</span>. The
direction of these characters is derived from their surrounding characters. If a neutral character is surrounded by characters of the same direction (e.g. a space surrounded by Arabic letters), it gets the direction of its neighbors. Otherwise (e.g. a space between an Arabic and a Latin, or a neutral character appearing at the start or the end of a paragraph), the neutral character gets its direction from the paragraph’s base direction. This is another effect of the base direction in the bidi algorithm.</p>
<p>The above explanation of the bidi algorithm is highly simplified, to convey only the
essentials of how Arabic text is transformed for rendering. The actual algorithm deals with many more character types and edge cases. Please refer to <a href=
"https://www.w3.org/International/articles/inline-bidi-markup/uba-basics">Unicode
Bidirectional Algorithm basics</a> [[UBA-BASICS]] for more information or <a href=
"http://www.unicode.org/reports/tr9/">Unicode Bidirectional Algorithm</a> [[UAX9]] for the official detailed documentation.</p>
</section>
</section>
</section>
<section id="h_shaping">
<h2>Glyph shaping & positioning</h2>
<section id="h_writing_styles">
<h3>Fonts & font styles</h3>
<p>Traditionally, the Arabic alphabet contains 28 letters, though for practical purposes it makes sense to include the hamzah, which increases the number of letters to 29. Numerous letters share the same skeleton and are only distinguished by diacritic signs. Since letters change according to their position in the word, Arabic fonts typically contain hundreds of glyphs. Depending on the design of the font, the inclusion of ligatures, stylistic alternates, contextually sensitive shapes, language localisation, punctuation, etc. can further expand the glyphset. Some contemporary fonts include many hundreds, in some cases even thousands of glyphs.</p>
<p>Early typefaces, some still in use today, incorporated design features based on a variety of simplifications. For example, one of the first approaches used a "typewriter" style, where the same glyph is used for different positions in a word. This is the case for initial and medial shapes for most letters. It is generally the browser default font for the Arabic script. A more unifying approach is the use of a single and detached glyphs for each letter without joining. Other approaches were used, producing visual results of more or less practicality.</p>
<p>Nowadays, there is a large choice of fonts, and one can choose the font that best suits one's typographical desire. However, one may also wish to take into account some non-typographical considerations like the following:</p>
<ul>
<li>Accessibility (readability and visibility).</li>
<li>Devices with a small screen (for example, larger loop and teeth height,
small descenders etc.), although fonts actually appear better on smartphones</li>
<li>Font style for titles and banners and alike (small number of words), may differ from the style for content text (long text).</li>
<li>Shapes and proportions (the size issue) in mixed texts.</li>
<li>Some fonts might give other opportunities for line justification than just the use of word spacing (See [[[#h_ligatures]]]).</li>
<li>etc.</li>
</ul>
<section id="h_the_islamic_manuscript_tradition">
<h3>The Islamic manuscript tradition</h3>
<section id="h_origins">
<h4>Origins</h4>
<p>The Arabic script belongs to the class of Semitic writing systems. It evolved from the Nabataean script, and attained its distinctive form by the 4th century CE. It is closely related to the Syriac and the Hebrew script. The earliest attested document written in the Arabic alphabet in its classical form stems from the Islamic era, it is dated to 643 CE.</p>
<p>According to Islamic belief, the prophet Muhammad received his revelations in the Arabic language. As a consequence, Arabic attained religious connotations. Muhammad's revelations were first compiled and standardised in writing after his death in 632 CE under the caliphs Abu Bakr and 'Uthman. The resulting holy book, the Qur’ān, became a central vehicle of the faith. The Arabic script thus spread with Islamic civilization beyond its origins, and started to be used to write other languages. Because of the wide reach of Islam, numerous, widely differing languages were and are written with the Arabic script. As a result, different orthographies, including variant forms of Arabic letters were conceived. In parallel to these linguistic developments, a plethora of regional styles emerged, making the Islamic manuscript tradition one of the richest in the world.</p>
</section>
<section id="h_writing_style">
<h4>Writing Styles</h4>
<p>Islamic manuscript practice evolved with Islamic art and civilization. Extant evidence of decorative use of the Arabic script can be found as early as the 7th century CE, in a mosaic band in the Dome of the Rock. Beyond frequent monumental inscriptions in architecture, calligraphy became one of the foremost Islamic art forms. Manuscript practice evolved into a central element of Islamic civilization through the copying of books, and the refinement of calligraphy into a form of fine art. The commissioning and writing of the Qur’ān was seen as an expression of religious devotion and provided the context for scribes and calligraphers to excel in their craft. Further to its use as a textual medium, Arabic calligraphy and lettering crafts developed a rich palette of decorative forms and uses.</p>
<p>A range of styles evolved over time in different regions and for different purposes. These include monumental writing styles used in decorative and representational settings, styles used for the exclusive use of rulers, common hands in different areas, and styles dedicated for specialist purposes such as miniature writing. Many of these styles fell out of use, some evolved further, and some retained a high degree of consistency over time.</p>
<p>There is evidence that both monumental and current hands existed and evolved from the early Islamic era onwards. Various exponents of the monumental styles are often loosely referred to as Kūfī, making it a term that lacks precision and clarity. It describes a group of styles that are marked by an angularity and stark graphic characteristics that are absent from the round scripts that took preeminence from the late 10th, early 11th century CE.</p>
<p>The ‘Abbassīd calligrapher Ibn Muqlah (885/886–940 CE) is recognised for his contribution to the evolution of the round scripts. He articulated a system of proportional relations of Arabic letterforms, which became known as 'The Proportioned Script' (al-khatt al-mansūb), although its precise meaning remains open to interpretation. His work was continued by Ibn al-Bawwāb (d. 1022 CE), and later by Yāqūt al-Musta‘ṣimī (d. 1298 CE), who is credited with refining and canonising six writing styles into their definitive forms. The so-called 'Six Pens' (al-aqlām as-sitta) became the expression of an early high point of Islamic calligraphy. They are grouped in sister scripts of two, in which one is a large, the other a small hand, which share some characteristics. They are (large/small) Thuluth and Naskh, Muhaqqaq and Reyhān, and Tawqi‘ and Riqa‘, and they superceded all preceding styles.
</p>
<p>Other cultural centres of Islamic civilization emerged after the fall of the ‘Abbassīds, and developed their distinct calligraphic styles. Of note is the Persian domain, where the styles Ta‘līq and Nasta‘līq emerged and attained their classical form in the 14th century CE. The Ottoman school began with Sheikh Hamdullah in the late 15th century CE, and built on the Arabic and Persian predecessors. It refined existing styles and invented new hands, to achieve a new high point in the arts of Islamic penmanship.</p>
<p>Today, only a handful of styles are still widely in use, and known. The most prominent style is Naskh, which has become the default form of Arabic in most contexts, notably as a model for typography. However, there is strong regional variance. In Iran and Pakistan Nasta‘līq remains the preferred style for Persian and Urdu respectively, and in the Mashriq Ruq‘ah (not to be confused with Riqa‘) retains a prominent role in casual writing, as well as in lettering. In calligraphic art, other styles are still practised, where notably Thuluth is preeminent.</p>
<dl>
<dt>Kūfī (كوفي)</dt>
<dd class="flexContainer">
<figure class="floatedFigure" id="fig_abbassid_style">
<img style="width: 200px; height: 147px;" src="images/kufiExampleQuran.jpg" alt=
"Kūfī ‘Abbassīd style">
<figcaption>Kūfī ‘Abbassīd style example [<a href=
"https://commons.wikimedia.org/wiki/File:A_section_of_the_Koran_-_Google_Art_Project.jpg">Source</a>].</figcaption>
</figure>
<p>Kūfī is best understood as an umbrella term containing numerous variants, including widely diverging styles such as ornamental Kūfī, square Kūfī, and the so-called Eastern Kūfī, making the term highly ambiguous. The earliest forms of Kūfī are attested from the 7th century CE, making it one of the oldest Arabic writing styles. The Kūfī style that gained prominence in the production of Qur’ān manuscripts from the 7th century CE, also known as ‘Abbassīd style, is characterized by angular forms, with pronounced emphasis of horizontal strokes, very small or closed counter shapes, and uniformity of spacing.</p>
</dd>
<dt>Maghribī (مغربي)</dt>
<dd class="flexContainer">
<figure class="floatedFigure" id="fig_maghribi_style">
<img style="width: 170px; height: 177px;" src="images/maghribi2.jpg" alt=
"Maghribi script">
<figcaption>Maghribī example [<a href=
"https://commons.wikimedia.org/wiki/File:Maghribi_script_sura_5.jpg">Source</a>].</figcaption>
</figure>
<p>Like Kūfī, the denomination Maghribī (western) is a generic name that encompasses numerous variants. This class of styles probably evolved from the ‘Abbassīd style when Islamic conquests advanced through North Africa and into the Iberian peninsula in the 8th century CE. It maintained some structural characteristics from the ‘Abbassīd style and evolved further into a distinct regional hand. Used for writing the Qur’ān as well as other scientific, legal and religious
manuscripts. <em>Rabat</em>, a <em><a href="#def_mabsut">mabsut</a></em> version of it, is widely used in some official printings in Morocco.</p>
</dd>
<dt>Thuluth (ثلث)</dt>
<dd class="flexContainer">
<figure class="floatedFigure" id="fig_thuluth_style">
<img style="width: 132px; height: 173px;" src="images/basmalahThuluth.png" alt=
"Thuluth script">
<figcaption>Thuluth example [<a href=
"https://commons.wikimedia.org/wiki/File:Basmalah-1wm.png">Source</a>].</figcaption>
</figure>
<p><em>Thuluth</em> is one of the first rounded styles and can be traced back to the late 10th century. Its name probably refers to the width of the pen, which was a third of the pen used for the ancient <em>Ṭūmār</em> style. Canonised as one of the six pens, over time <em>Thuluth</em> acquired preeminence in the Islamic calligraphic arts. It was mainly used for large, decorative applications such as titles, chapters headers, or monumental inscriptions.</p>
</dd>
<dt>Naskh (نسخ)</dt>
<dd class="flexContainer">
<figure class="floatedFigure" id="fig_naskh_style">
<img style="width: 147px; height: 166px;" src="images/naskhQuran2.png" alt=
"Naskh script">
<figcaption>Naskh example [<a href=
"https://commons.wikimedia.org/wiki/File:FirstSurahKoran_%28fragment%29.jpg">Source</a>].</figcaption>
</figure>
<p><em>Naskh</em> means ‘copying’, and it is the bookhand par excellence of the Arabic manuscript tradition. It emerged at the end of the 10th century, and developed into distinct regional schools from the 13th century. Over time Naskh became the predominant writing style for continuous text and superseded most earlier styles. When typography was adopted in the Arabic script world, <em>Naskh</em> formed the basis for most types intended for continuous reading.</p>
</dd>
<dt>Taʻlīq (تعليق)</dt>
<dd class="flexContainer">
<figure class="floatedFigure" id="fig_taliq_style">
<img style="width: 130px; height: 200px;" src="images/taliq.jpg" alt="Taʻlīq script">
<figcaption>Taʻlīq example [<a href=
"https://upload.wikimedia.org/wikipedia/commons/thumb/8/87/Ta'liq_script_1.jpg/389px-Ta'liq_script_1.jpg">Source</a>].</figcaption>
</figure>
<p><em>Taʻlīq</em> (hanging) is a Persian chancery style. It probably emerged from the older <em>tawqī‘</em> and its definitive form was established by the 13th century. As the name indicates, and owing to the pronounced inclination, it gives the impression of being suspended from above.</p>
</dd>
<dt>Nastaʻlīq (نستعلیق)</dt>
<dd class="flexContainer">
<figure class="floatedFigure" id="fig_nastaliq_style">
<img style="width: 130px; height: 130px;" src="images/nastaliq.jpg" alt=
"Nastaliq script">
<figcaption>Nastaʻlīq example [<a href=
"https://commons.wikimedia.org/wiki/File:Khatt-e_Nastaliq.jpg">Source</a>].</figcaption>
</figure>
<p>The name <em>nastaʻlīq</em> combines <em>naskh</em> and <em>taʻlīq</em>, which may indicate the two influences of this style. <em>Nastaʻlīq</em> became the archetypal writing style of the Persianate world, and attained its definitive form by the late 14th century, although earlier forms are attested. Like taʻlīq it gives a ‘hanging’ impression, and is marked by smooth, flowing curves. Although not widely used for continuous text, nastaʻlīq also found some use for short texts in Arabic.</p>
</dd>
<dt>Dīwānī (ديواني)</dt>
<dd class="flexContainer">
<figure class="floatedFigure" id="fig_diwani_style">
<img style="width: 250px; height: 101px;" src="images/diwani.png" alt=
"Diwani script">
<figcaption>Dīwānī example [<a href=
"https://commons.wikimedia.org/wiki/File:Izzet_44.png">Source</a>].</figcaption>
</figure>
<p><em>Dīwānī</em> was used at the Ottoman court (Dīwān) for official documents, making it a typical chancery script. It developed from the Persian <em>taʻlīq</em> and found use from the late 15th century. Its considerable complexity was intended to preclude forgeries of official documents.</p>
</dd>
<dt>Ruqʻah (رقعة)</dt>
<dd class="flexContainer">
<figure class="floatedFigure" id="fig_ruqah_style">
<img style="width: 129px; height: 168px;" src="images/Ruq_ah.gif" alt="Ruqʻa script">
<figcaption>Ruqʻa example [<a href=
"https://fa.wikipedia.org/wiki/%D9%BE%D8%B1%D9%88%D9%86%D8%AF%D9%87:Ruq_ah.gif">Source</a>].</figcaption>
</figure>
<p>The <em>Ruq’ah</em> style evolved from <em>Dīwānī</em> in the 18th century as a fast chancery hand in the Ottoman Empire. It should not be confused with the older <em>Riqā‘</em> that is wholly different. <em>Ruq’ah</em> is still commonly used in the region of the fertile crescent as the preferred hand for everyday use. Its compact proportions give it a dark appearance, and its origins as a small, fast hand lend the shapes a graphic simplicity when enlarged, a feature that makes it popular in contemporary lettering. </p>
</dd>
</dl>
</section>
</section>
</section>
<section class="h_context">
<h3>Context-based shaping & positioning</h3>
<p>Arabic script has some characteristics that are challenging for typographers and font designers. The examples below show some characteristics that require careful consideration. How, even in the simpler <span style="font-style: italic;">naskh</span> style, can typography, which came late to the Arabic world, follow the tradition of the many authors and artists who manually shaped the Arabic writing over decades.</p>
<section>
<h4>Multi-level baselines</h4>
<p>Letters may join through a finely inclined line.</p>
<figure id="fig_inclined_line">
<div style="margin-left: 40px;"><img style="width: 132px; height: 62px;" alt=
"slope baseline" src="images/yastabchiro.jpg"></div>
<figcaption>Inclining baseline.</figcaption>
</figure>
<p>Or they may appear on two, square-ended lines.</p>
<figure id="fig_square_ended_lines">
<div style="margin-left: 40px;"><img style="width: 92px; height: 79px;" alt=
"two level baselin" src="images/yastami3o.jpg"></div>
<figcaption>Dual-level baseline.</figcaption>
</figure>
<p>Multilevel baselines don't occur in all fonts. The above examples use the Arabic Typesetting font. Compare those examples to more typical fonts:</p>
<figure id="fig_single_baselines">
<p style="margin-left: 40px;"><img style="width: 110px; height: 93px;" alt="normal Font" src="images/yastabchiroNormal.jpg"></p>
<figcaption>Flat baselines.</figcaption>
</figure>
</section>
<section>
<h4>Multi-context joining</h4>
<p>Rendering of letters depends not only on their place in the word (initial, medial, final) but also on their neighboring letters, i.e. the letter they join with. Each letter has a different appearance in each combination.</p>
<figure id="fig_forms_of_noon">
<img style="width: 324px; height: 79px;" alt="Different initial shape of noon" src="images/differentInitialNoon.jpg">
<figcaption>Initial letter noon, showing many different forms.</figcaption>
</figure>
<p>Fonts don't always comply with or respect this kind of <span class=
"qterm">tuning</span>. To do so, fonts need many glyphs in order to adapt to each
context. In more modern typefaces some of these connections are implemented by ligatures, but ligatures can't capture or cover all joining behavior.</p>
<p>In the two left most words, the initial noon differs in that one raises a kind of stroke. This property of raising a stroke is common for a number of letters (beh, teh, noon, theh) which are taller than their connected letters in order to be distinguished in some contexts, such as <img style="vertical-align: middle; width: 37px; height: 31px;" alt="Beh with stroke before seen" src="images/bsl.jpg"> vs. <img style="vertical-align: middle; width: 43px; height: 39px;" alt="Beh without stroke after seen" src="images/sbl.jpg">, or to resolve ambiguity. See also [[[#h_teeth_letters]]].</p>
</section>
<section>
<h4>Words as groups of letters</h4>
<p>A word shape is composed of not (only) a set of "horizontally" connected letters, but groups of letters (syntagmes).</p>
<p>[[[#fig_word_groups_1]]] shows syntagmes in two words in a naskh font with many glyph variants.</p>
<figure id="fig_word_groups_1">
<table>
<tbody>
<tr>
<td style="text-align: center; width: 40%;"><img style="width: 76px; height: 46px;"
alt="Aleph and two groups of letters to form a word" src="images/barmajaAmiri.jpg">
</td>
<td style="text-align: center;width:40%;"><img style="width: 127px; height: 57px;"
alt="two other group of letters" src="images/stimrarihimaArabicTypesetting.jpg">
</td>
</tr>
</tbody>
</table>
<figcaption>Groups of letters are colored blue or red.</figcaption>
</figure>
<p>Compare that with the same words in more plain font:</p>
<figure id="fig_word_groups_2">
<table>
<tbody>
<tr>
<td style="text-align: center; width:40%;"><img style="width: 98px; height: 43px;"
alt="same word in more normal font" src="images/barmajaDefault.png">
</td>
<td style="text-align: center; width: 40%;"><img style=
"width: 144px; height: 46px;" alt="same word in default font" src="images/stimrarihimaDefault.jpg">
</td>
</tr>
</tbody>
</table>
<figcaption>Can't really say letter groups. Rather a "horizontal sequence of letters of almost same width".</figcaption>
</figure>
<p>Group combinations cannot be covered by general or usual ligatures.</p>
</section>
<section>
<h4>Vertical joining</h4>
<p>Groups of letters may also join vertically (top down) instead of right to left. Not all fonts permit this.</p>
<figure id="fig_vertical_joining">
<table>
<tbody>
<tr>
<td style="text-align: center;"><img style=
"text-align: center; width: 65px; height: 70px;" alt="Vertical joining" src="images/vertivalJoin.jpg"></td>
<td style="text-align: center;"><img style=
"text-align: center; width: 69px; height: 46px;" alt="horizontal joing" src="images/horizontalJoin.jpg"></td>
</tr>
</tbody>
</table>
<figcaption>Almost vertical joining (left), vs. horizontal joins (right).</figcaption>
</figure>
<p>Once again, some fonts try standard ligatures, but this is not ligature. This is
rather (good) writing practice/style.</p>
<p>One should note that all these features have not only an aesthetic side, but also play a role in justification. Choosing a joining style to suit the desired line width is done at the discretion of authors for hand-written text. Applications should provide general rules to emulate these options, but achieving such justification requires sophisticated algorithms.</p>
</section>
<section id="h_teeth_letters">
<h4>The so-called teeth letters.</h4>
<p>Where successive letters have a uniform medial shape, they can be rendered in a way that resembles teeth.</p>
<figure id="fig_teeth_letters">
<div><img style="width: 276px; height: 74px;" alt="Teeth letters" src="images/teeth.jpg"></div>
<figcaption>"Teeth" letters.</figcaption>
</figure>
<p>Individual letter shapes may vary according to the context. It's not always the same letters (in red)
which raise the stroke in [[[#fig_teeth_letters]]].</p>
</section>
<section id="h_ligatures">
<h4>Ligatures</h4>
<p>Almost all the writing styles of the Arabic script use a special shape when the letters
<span class="lettername">lam</span> and <span class="lettername">alef</span> are joined. Most Arabic fonts include mandatory ligatures for this combination. Ignoring this ligature, as shown in [[[#fig_laam_alef_ligature]]], leads to incorrectly rendered text.</p>
<figure id="fig_laam_alef_ligature">
<img src="images/laam-alef-ligature.png" alt="Correct and wrong ways of rendering letter lam followed by letter alef" style="width:60%">
<figcaption>
Correct and incorrect ways of rendering letter <span class="lettername">lam</span> followed by letter <span class="lettername">alef</span></figcaption>
</figure>
<p>This shape is not limited to the combination <span class="codepoint" translate="no"><span lang="ar" dir="rtl">لا</span> [<span class="uname">U+0644 ARABIC LETTER LAM</span> + <span class="uname">U+0627 ARABIC LETTER ALEF</span>]</span>. Variations of the letter <span class="lettername">alef</span> such as <span class="codepoint" translate="no"><span lang="ar" dir="rtl">آ</span> [<span class="uname">U+0622 ARABIC LETTER ALEF WITH MADDA ABOVE</span>]</span> and <span class="codepoint" translate="no"><span lang="ar" dir="rtl">أ</span> [<span class="uname">U+0623 ARABIC LETTER ALEF WITH HAMZA ABOVE</span>]</span> when combined with the letter <span class="lettername">lam</span> follow the same rules. Combination with diacritics does not affect these ligatures. Each of these ligatures also provides a special shape for joining on its right side (to the preceding letter).</p>
</section>
<section id="h_diacritics">
<h4>Diacritics</h4>
<p>More than one diacritic can occur after a single base character and all of them should be visually attached to the same character. Font files usually define special shapes or positioning for combination of diacritics. This extra information should be applied in rendering texts.</p>
<p>[[[#fig_combining_diacritics]]] shows an example, where, according to this
font’s specification, combining U+0651 ARABIC SHADDA and U+0650 ARABIC KASRA changes their positions. Various font files may require different transformations.</p>
<figure id="fig_combining_diacritics">
<img src="images/combining-diacritics.png" alt="Diacritics could be combined in Arabic script." style="width:60%">
<figcaption>Diacritics could be combined in Arabic script.</figcaption>
</figure>
</section>
<section id="h_diacritic_position">
<h4>Positioning diacritics relative to base characters</h4>
<p>In Arabic script text it is unusual to use diacritics for vowel information and for
consonant lengthening. If they are used, however, there are different approaches to their placement relative to the base characters they modify. Some fonts display short vowel diacritics at the same height, while others vary the height according to the base
character.</p>
<p>Another potential difference arises when a short i vowel diacritic is used with a shadda. In some cases the vowel diacritic remains below the base letter, whereas in other cases the vowel diacritic appears above the base letter, but under the shadda (so that it can be distinguished from the short a vowel diacritic, which appears above the shadda).</p>
</section>
</section>
<section class="h_cursive">
<h3>Cursive text</h3>
<p>Arabic script is a cursive writing system; i.e, letters can join to their neighboring letters. Besides the core behavior of the script, there are some details on how content is encoded in Unicode, and some rules around joining behavior when rendering special cases.</p>
<section id="h_joining_forms">
<h4>Joining Forms</h4>
<p>Every Arabic letter has one, two, or four different joining forms, which allow the
letter to join to its neighbors, if applicable. These four forms are:</p>
<ul>
<li><dfn data-lt="isolated">Isolated form</dfn>, used when the letter does not join to
any of the surrounding letters;</li>
<li><dfn data-lt="initial">Initial form</dfn>, used when the letter is joining only to
its next (left-hand side) letter;</li>
<li><dfn data-lt="medial">Medial form</dfn>, used when the letter is joining on both
sides, and</li>
<li><dfn data-lt="final">Final form</dfn>, used when the letter is joined only to its
previous (right-hand side) letter.</li>
</ul>
<p>[[[#fig_letter_meem_shapes]]] shows samples of all four joining forms for <span class="uname">U+0645 ARABIC LETTER MEEM</span> (<span dir="rtl" lang=
"ar">م</span>).</p>
<figure id="fig_letter_meem_shapes">
<img height="140" src="images/drawings/joining/joining-meem-isolated.png" alt=
"Isolated joining form of U+0645 ARABIC LETTER MEEM." style="width:10em">
<img height="140" src="images/drawings/joining/joining-meem-final.png" alt=
"Final joining form of U+0645 ARABIC LETTER MEEM." style="width:10em">
<img height="140" src="images/drawings/joining/joining-meem-medial.png" alt=
"Medial joining form of U+0645 ARABIC LETTER MEEM." style="width:10em">
<img height="140" src="images/drawings/joining/joining-meem-initial.png" alt=
"Initial joining form of U+0645 ARABIC LETTER MEEM." style="width:10em">
<figcaption>Four different letter forms for joining to surrounding letters.</figcaption>
</figure>
<p>We define the following two groups of joining forms:</p>
<ul>
<li><dfn data-lt="join-to-left">Join-to-left forms</dfn>: either Initial form or Medial
form of a letter, which joins to the left-hand side (next) letter. Other forms are called <dfn data-lt="non-join-to-left form">non-join-to-left</dfn>.</li>
<li><dfn data-lt="join-to-right">Join-to-right forms</dfn>: either Medial form or Final
form of a letter, which joins to the right-hand side (previous) letter. Other forms are
called <dfn data-lt="non-join-to-right form">non-join-to-right</dfn>.</li>
</ul>
</section>
<section id="h_joining_categories">
<h4>Joining Categories</h4>
<p>There are different categories of letters based on their joining behavior:</p>
<ul>
<li><dfn data-lt="dual-joining">Dual-joining letters</dfn>: can join from both sides, like the letter in <a href="#fig_letter_meem_shapes"></a>, and has all the four shapes mentioned above.</li>
<li>
<dfn data-lt="right-joining">Right-joining letters</dfn>: can only join to their
previous (right-hand side) letter, and therefore, only have <a>isolated</a> and
<a>final</a> shapes. <a href="#fig_letter_reh_shapes"></a> shows samples of both
forms for U+0631 ARABIC LETTER REH (ر).</li>
<li>
<dfn data-lt="non-joining">Non-joining letters</dfn>: cannot join to any surrounding
letter, and therefore can only take the <a>isolated</a> form. <a href=
"#fig_letter_hamzah_shape"></a> shows a sample of U+0621 ARABIC LETTER HAMZAH (ء) in
its only possible form.</li>
</ul>
<figure id="fig_letter_reh_shapes">
<img height="140" src="images/drawings/joining/joining-reh-isolated.png" alt=
"Isolated joining form of U+0631 ARABIC LETTER REH." style="width:10em">
<img height="140" src="images/drawings/joining/joining-reh-final.png" alt=
"Final joining form of U+0631 ARABIC LETTER REH." style="width:10em">
<figcaption><a>Right-joining letters</a> only have two forms of <a>final</a> and <a>isolated</a>.</figcaption>
</figure>
<p>Most of Arabic letters are either <a>dual-joining</a> or <a>right-joining</a>.</p>
<figure id="fig_letter_hamzah_shape">
<img height="140" src="images/drawings/joining/joining-hamza.png" alt=
"One joining form of U+0621 ARABIC LETTER HAMZAH." style="width:10em">
<figcaption><a>Non-Joining letters</a> only have one form: <a>isolated</a>.</figcaption>
</figure>
</section>
<section id="h_joining_rules">
<h4>Joining Rules</h4>
<p>There are core rules on how letters join to each other in the Arabic script, which stay valid regardless of the medium (hand-writing, typewriter, movable-type, digital, etc):</p>
<ol>
<li id="joining_rule_1">Letters of each word join together whenever possible,
implicitly.</li>
<li id="joining_rule_2">In some languages, like Persian and Urdu, there are words—mostly, but not limited to, compound words—that require explicit breaks in the joining of letters, although joining would otherwise be possible.</li>
<li id="joining_rule_3">In certain cases, a letter can be in a <a>join-to-left</a> form
without actually connecting to anything on the left, whether there’s any letter or not.
This is often seen in list counters, abbreviations, and other cases where letters do not have a word context, or are taken out of their original word context.</li>
<li id="joining_rule_4">In rare cases of words splitting where letters are joined, first letter of the second half will be in a <a>join-to-right</a> form without any previous letter. This behavior is limited to special cases like blanking specific letters of a word, line breaks in a paragraph, and word breaks across poetry verses. No standalone word can have any letters in <a>join-to-right</a> form without joining on the right-hand side.</li>
</ol>
<p>[[[#fig_joining_process]]] demonstrates how letters join (per Joining Rule
1) to form a word.</p>
<figure id="fig_joining_process">
<img src="images/joining-process.png" alt="Letter BEH and MEEM join to form a word." style="width:60%">
<figcaption>Letters join by taking their relevant form.</figcaption>
</figure>
</section>
<section id="h_joining_control">
<h4>Joining Control</h4>
<p>Arabic letters are represented in their intended joining forms in hand-writing,
typewriters, and old (deprecated) digital encodings of the script. In Unicode, letters are encoded semantically—meaning without any information about their joining form—and therefore there’s need for a mechanism for controlling of the joining behavior of the letters.</p>
<p>In Unicode, by default, neighbor Arabic letters join together if and only if both
letters are able to join towards the other.</p>
<section id="h_disjoining_enforcement">
<h5>Disjoining Enforcement</h5>
<p>As noted in Joining Rule 2, sometimes two Arabic letters sit next to each other (in
one word) which would normally join together, but should not. In Unicode, for such a
case, a special character should be used to enforce disjoining of these letters. This
character is called <span class="uname">U+200C ZERO WIDTH NON-JOINER</span>, or
<dfn>ZWNJ</dfn> for short.</p>
<figure id="fig_disjoining_enforcement">
<img height="140" src="images/drawings/joining/joining-beh-yeh-zwnj-beh-yeh.png" alt=
"ZWNJ example." style="width:20em">
<figcaption>Example of using <a>ZWNJ</a> for <a href="#h_disjoining_enforcement">disjoining enforcement</a>.</figcaption>
</figure>
</section>
<section id="h_joining_enforcement">
<h5>Joining Enforcement</h5>
<p>Similarly, as noted in Joining Rule 4, sometimes an Arabic letter needs to take a
joining form when it would not happen normally. For example, some abbreviation methods use Initial Form of letters, when possible, for every letter in the abbreviation. Again, in Unicode, a special character should be used to enforce joining on this letter. This character is called <span class="uname">U+200D ZERO WIDTH JOINER</span>, or <dfn>ZWJ</dfn> for short.</p>
<p>Besides <a>ZWJ</a>, there’s another special Unicode character, <span class=
"uname">U+0640 ARABIC TATWEEL</span>, which enforces joining behavior (join causing) on
letters next to it. But, in contrast to <a>ZWJ</a>, <dfn>TATWEEL</dfn> has a glyph shape, looking like a hyphen and usually as wide as the SPACE glyph, which connects to the letters on the main joining line (a.k.a. base-line). So, using <a>TATWEEL</a> would give a similar Joining Enforcement behavior, but has a side effect of wider length for the letter, which is not always desired. That’s why it’s highly recommended to only use
<a>ZWJ</a> for joining control.</p>
<figure id="fig_joining_enforcement">
<img height="140"
src="images/drawings/joining/joining-heh-zwj-fullstop-sheh-fullstop.png"
alt="ZWJ example." style="width:25em">
<img height="140"
src="images/drawings/joining/joining-heh-tatweel-fullstop-sheh-fullstop.png"
alt="TATWEEL example." style="width:30em">
<figcaption>Example of using <a>ZWJ</a> (recommended) and <a>TATWEEL</a> (not recommended) for <a href="#h_joining_enforcement">joining enforcement</a>.
</figcaption>
</figure>
<p>In Unicode, <a>ZWNJ</a> and <a>ZWJ</a> are called <dfn>Joining Control Characters</dfn>.</p>
</section>
<section id="h_joining_disjoining_enforcement">
<h5>Joining-Disjoining Enforcement</h5>
<p>Two enforcement methods mentioned above can be combined together to form a
<dfn>Joining-Disjoining Enforcement</dfn> method, that enables <a href=
"#joining_rule_3">Joining Rule 3</a> for cases when there’s a <a>dual-joining</a>/<a>right-joining</a> letter after a <a>join-to-left</a> letter, which
should not be joined to its previous letter.</p>
<figure id="fig_joining_disjoining_enforcement">
<img height="140" src="images/drawings/joining/joining-heh-zwj-zwnj-sheh.png"
alt="ZWJ+ZWNJ example." style="width:20em">
<figcaption>Example of using <span class="qterm"><ZWJ, ZWNJ></span> for
<a>joining-disjoining enforcement</a>.</figcaption>
</figure>
</section>