-
Notifications
You must be signed in to change notification settings - Fork 286
/
zcl_demo_abap_regex.clas.abap
1118 lines (807 loc) · 45 KB
/
zcl_demo_abap_regex.clas.abap
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"! <p class="shorttext"><strong>Regular Expressions in ABAP</strong><br/>ABAP cheat sheet example class</p>
"!
"! <p>The example class demonstrates regular expressions in ABAP.<br/>
"! Choose F9 in ADT to run the class.</p>
"!
"! <h2>Information</h2>
"! <p>Find information on getting started with the example class and the disclaimer in
"! the ABAP Doc comment of class {@link zcl_demo_abap_aux}.</p>
CLASS zcl_demo_abap_regex DEFINITION
PUBLIC
FINAL
CREATE PUBLIC .
PUBLIC SECTION.
INTERFACES if_oo_adt_classrun.
INTERFACES if_abap_matcher_callout.
PROTECTED SECTION.
PRIVATE SECTION.
CLASS-DATA callout TYPE i.
CLASS-DATA callout_tab TYPE string_table.
ENDCLASS.
CLASS zcl_demo_abap_regex IMPLEMENTATION.
METHOD if_oo_adt_classrun~main.
out->write( |ABAP cheat sheet example: Regular Expressions in ABAP\n\n| ).
out->write( `1) Characters and Character Types` ).
DATA string_chars_types TYPE string.
"Specific character
string_chars_types = replace( val = `abcdef` pcre = `a` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any character except a line break
string_chars_types = replace( val = |ab 1#?C\n D23| pcre = `.` with = `_` occ = 0 ).
out->write( string_chars_types ).
"Any digit
string_chars_types = replace( val = `a1-b2 3-4c9` pcre = `\d` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any non-digit
string_chars_types = replace( val = `a1-b2 3-4c9` pcre = `\D` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any whitespace character such as a blank, tab and new line
string_chars_types = replace( val = |ab cd\tef\ngh| pcre = `\s` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any character that is not a
string_chars_types = replace( val = |ab cd\tef\ngh| pcre = `\S` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any word character (letter, digit or the underscore)
string_chars_types = replace( val = `(ab 12_c)` pcre = `\w` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any character that is not a word character
string_chars_types = replace( val = `(ab 12_c)` pcre = `\W` with = `#` occ = 0 ).
out->write( string_chars_types ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `2) Escaped Characters and Special Variants` ) ).
DATA string_esc_chars TYPE string.
"Special characters
string_esc_chars = replace( val = `a[b]c\d/e^f.g` pcre = `\[|\]|\\|\/|\^|\.` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Line feeds
string_esc_chars = replace( val = |a\nb\nc| pcre = `\n` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Line feed negation
string_esc_chars = replace( val = |a\nb\nc| pcre = `\N` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Tabs
string_esc_chars = replace( val = |a\tb\tc| pcre = `\t` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Carriage return
string_esc_chars = replace( val = |d\re\rf| pcre = `\r` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Characters with hex code
"The example string includes a non-breaking space.
string_esc_chars = replace(
val = |#{ cl_abap_conv_codepage=>create_in( codepage = `UTF-16BE` )->convert( source = CONV xstring( `00A0` ) ) }#|
pcre = `\x{00A0}` with = `x` occ = 0 ).
out->write( string_esc_chars ).
"Characters with Unicode code point
"As above, the example string includes a non-breaking space.
"The PCRE syntax uses the control verb (*UTF) to enable UTF mode.
string_esc_chars = replace(
val = |#{ cl_abap_conv_codepage=>create_in( codepage = `UTF-16BE` )->convert( source = CONV xstring( `00A0` ) ) }#|
pcre = `(*UTF)\N{U+00A0}` with = `y` occ = 0 ).
out->write( string_esc_chars ).
"Characters with a specified Unicode character property
string_esc_chars = replace( val = `Hello ABAP` pcre = `\p{Ll}+` with = `#` occ = 0 ).
out->write( string_esc_chars ).
string_esc_chars = replace( val = `Hello ABAP` pcre = `\p{Lu}+` with = `#` occ = 0 ).
out->write( string_esc_chars ).
string_esc_chars = replace( val = `Hello ABAP` pcre = `\P{Ll}+` with = `#` occ = 0 ).
out->write( string_esc_chars ).
string_esc_chars = replace( val = `Hello ABAP` pcre = `\P{Lu}+` with = `#` occ = 0 ).
out->write( string_esc_chars ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `3) Quantifiers, Repetitions and Alternatives` ) ).
DATA string_quan_rep_alt TYPE string.
"Zero or more repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab*` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"One or more repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab+` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Between x and y repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab{2,3}` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Exactly n repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab{3}` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Exactly n or more repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab{2,}` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Optional matches
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab?` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Alternative matches
string_quan_rep_alt = replace( val = `azbycxdwevfu` pcre = `a|b|c` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Capturing non-greedily (zero or more repetitions)
string_quan_rep_alt = replace( val = `abcd abccccd ab` pcre = `bc*?` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
string_quan_rep_alt = replace( val = `abcxdefghxi` pcre = `\A.*?x` with = `_` ).
out->write( string_quan_rep_alt ).
"Example to compare with greedy capturing
string_quan_rep_alt = replace( val = `abcxdefghxi` pcre = `\A.*x` with = `_` ).
out->write( string_quan_rep_alt ).
"Capturing non-greedily (one or more repetitions)
string_quan_rep_alt = replace( val = `abcd abccccd ab` pcre = `bc+?` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Example to compare with greedy capturing
string_quan_rep_alt = replace( val = `abcd abccccd ab` pcre = `bc+` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
string_quan_rep_alt = replace( val = `<span>Hallo</span>` pcre = `<.+?>` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `4) Character Sets and Ranges` ) ).
DATA string_char_sets_ranges TYPE string.
"Defining a character set
string_char_sets_ranges = replace( val = `bit bat but bet` pcre = `b[iu]` with = `#` occ = 0 ).
out->write( string_char_sets_ranges ).
"Defining a character range
string_char_sets_ranges = replace( val = `aa1 ab2 ba3 cac4 da56 a7` pcre = `a[a-c0-5]` with = `#` occ = 0 ).
out->write( string_char_sets_ranges ).
"Matching any single character not present in the list
string_char_sets_ranges = replace( val = `ABap` pcre = `[^Ap]` with = `#` occ = 0 ).
out->write( string_char_sets_ranges ).
"Matching any single character not within the range
string_char_sets_ranges = replace( val = `ABCDabcd123456` pcre = `[^A-Ca-c1-4]` with = `#` occ = 0 ).
out->write( string_char_sets_ranges ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `5) Anchors and Positions` ) ).
DATA string_anchors_pos TYPE string.
"Start of subject, syntax \A
string_anchors_pos = replace( val = `abc def` pcre = `\A` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"Start of subject, syntax ^; find more information below regarding multi-line mode
string_anchors_pos = replace( val = |abc\ndef\nghi| pcre = `(?m)^` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"The following examples uses ^ without enabling the multi-line mode
string_anchors_pos = replace( val = |abc\ndef\nghi| pcre = `^` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"End of subject, syntax \Z
string_anchors_pos = replace( val = `abc def` pcre = `\Z` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"End of subject, syntax $
"The example uses multi-line mode
string_anchors_pos = replace( val = |abc\ndef\nghi| pcre = `(?m)$` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"Start or end of word
string_anchors_pos = replace( val = `abcd a12d ed` pcre = `\ba.` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
string_anchors_pos = replace( val = `abcd a12d ed` pcre = `\Dd\b` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
string_anchors_pos = replace( val = `abcd a12d ed` pcre = `\b.d\b` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"Not at the start or end of words
string_anchors_pos = replace( val = `see an elefant` pcre = `\Be\B` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"Resetting the starting point of a match
string_anchors_pos = replace( val = `abcd` pcre = `a.\Kc` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `6) Capturing Groups, Replacements and Backreferences` ) ).
DATA string_capt_group TYPE string.
string_capt_group = replace( val = `bit bat but bet` pcre = `b(a|u)t` with = `#` occ = 0 ).
out->write( string_capt_group ).
"------- Replacements with capturing groups -------
string_capt_group = replace( val = `APAB` pcre = `(..)(..)` with = `$2$1` ).
out->write( string_capt_group ).
"$0 representing the content of the whole match
string_capt_group = replace( val = `abcd` pcre = `(..)(..)` with = `#$0#` ).
out->write( string_capt_group ).
"Alternative replacement syntax with the curly brackets
string_capt_group = replace( val = `APAB` pcre = `(..)(..)` with = `${2}${1}` ).
out->write( string_capt_group ).
"------- Named capturing groups -------
"PCRE syntax (?<name>...), replacement syntax $name
string_capt_group = replace( val = xco_cp=>sy->date( )->as( xco_cp_time=>format->abap )->value
pcre = `(?<yr>\d{4})(?<mo>\d{2})(?<d>\d{2})`
with = `Day: $d, month: $mo, year: $yr` ).
out->write( string_capt_group ).
"Alternative PCRE syntax (?'name'...), replacement syntax ${name}
string_capt_group = replace( val = xco_cp=>sy->date( )->as( xco_cp_time=>format->abap )->value
pcre = `(?'yr'\d{4})(?'mo'\d{2})(?'d'\d{2})`
with = `${d}.${mo}.${yr}` ).
out->write( string_capt_group ).
"Creating a group but not capturing it
string_capt_group = replace( val = `abcd` pcre = `(?:..)(..)` with = `#$1#` ).
out->write( string_capt_group ).
"The following example raises an exception. $2 cannot be referred to.
TRY.
string_capt_group = replace( val = `abcd` pcre = `(?:..)(..)` with = `#$1#$2#` ).
CATCH cx_sy_invalid_regex_format INTO DATA(error).
out->write( error->get_text( ) ).
ENDTRY.
string_capt_group = replace( val = `abcd` pcre = `(?:..)(..)` with = `#$0#` ).
out->write( string_capt_group ).
"------- Back reference -------
"In the example, the capturing group 1 holds `ab`. The second capturing group captures
"all word characters until `ab` is found, including `ab`. The reference to the first
"capturing group is made using \1.
string_capt_group = replace( val = `abcdefabghij` pcre = `(a.)(\w*)\1` with = `#` ).
out->write( string_capt_group ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `7) Lookarounds` ) ).
DATA string_look_arounds TYPE string.
"Positive lookahead
string_look_arounds = replace( val = `abc ade` pcre = `a(?=b)` with = `#` occ = 0 ).
out->write( string_look_arounds ).
"Negative lookahead
string_look_arounds = replace( val = `abc ade` pcre = `a(?!b)` with = `#` occ = 0 ).
out->write( string_look_arounds ).
"Positive lookbehind
string_look_arounds = replace( val = `ab c abcd` pcre = `(?<=\s)c` with = `#` occ = 0 ).
out->write( string_look_arounds ).
"Negative lookbehind
string_look_arounds = replace( val = `ab c abcd` pcre = `(?<!\s)c` with = `#` occ = 0 ).
out->write( string_look_arounds ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `8) Case Conversions in Replacement Patterns` ) ).
DATA string_case_conv TYPE string.
"\u syntax
string_case_conv = replace( val = `abcdefg` pcre = `c(.*)` with = `c\u$1` ).
out->write( string_case_conv ).
"\U syntax
string_case_conv = replace( val = `abcdefg` pcre = `c(.*)` with = `c\U$1` ).
out->write( string_case_conv ).
"\l syntax
string_case_conv = replace( val = `HIJKLMNO` pcre = `K(.*)` with = `K\l$1` ).
out->write( string_case_conv ).
"\L syntax
string_case_conv = replace( val = `HIJKLMNO` pcre = `K(.*)` with = `K\L$1` ).
out->write( string_case_conv ).
"\E syntax
string_case_conv = replace( val = `abcdefg` pcre = `c(..)(..)` with = `c\U$1\E$2` ).
out->write( string_case_conv ).
"The following example is a comparison to similar syntax without specifying \E
string_case_conv = replace( val = `abcdefg` pcre = `c(..)(..)` with = `c\U$1$2` ).
out->write( string_case_conv ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `9) Setting Options and Control Verbs` ) ).
DATA string_opt_set TYPE string.
"----------- Examples with `.` and single line mode -----------
"`.` matches new lines feeds when the single line option is set
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `.` with = `#` occ = 0 ).
out->write( string_opt_set ).
"Result (11 replacements including the new line feeds):
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?s).` with = `#` occ = 0 ).
out->write( string_opt_set ).
string_opt_set = replace( val = |\n\n| pcre = `(?s).` with = `#` occ = 0 ).
out->write( string_opt_set ).
"----------- Examples for the start of subjects -----------
"Multi-line mode not enabled
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `^.` with = `#` occ = 0 ).
out->write( string_opt_set ).
"Multi-line mode enabled
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?m)^.` with = `#` occ = 0 ).
out->write( string_opt_set ).
"\A for comparison
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?m)\A.` with = `#` occ = 0 ).
out->write( string_opt_set ).
"----------- Examples for the end of subjects -----------
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `.$` with = `#` occ = 0 ).
out->write( string_opt_set ).
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?m).$` with = `#` occ = 0 ).
out->write( string_opt_set ).
"\Z syntax (the result is the same as with `.$`)
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?m).\Z` with = `#` occ = 0 ).
out->write( string_opt_set ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `10) Extended Mode` ) ).
DATA some_string TYPE string.
"Extended mode is enabled by default
"No replacement in the example.
some_string = replace( val = `abc def` pcre = `abc def` with = `#` ).
out->write( some_string ).
"The following example works because there is no whitespace in the source string.
"The whitespace in the regular expression is ignored.
some_string = replace( val = `abcdef` pcre = `abc def` with = `#` ).
out->write( some_string ).
"To match whitespace characters, you can use the pattern \s
some_string = replace( val = `abc def` pcre = `abc\sdef` with = `#` ).
out->write( some_string ).
"Escaping a whitespace using \
some_string = replace( val = `abc def` pcre = `abc\ def` with = `#` ).
out->write( some_string ).
"The following example also performs the replacement as the unescaped whitespaces
"are ignored.
some_string = replace( val = `abc def` pcre = `abc\ def` with = `#` ).
out->write( some_string ).
"Disabling the extended mode so that whitespaces are not ignored
some_string = replace( val = `abc def` pcre = `(?-x)abc def` with = `#` ).
out->write( some_string ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `11) Control verbs` ) ).
DATA ctrl_verb_string TYPE string.
"Using (*UTF); the source string contains a non-breakable space that is to be replaced
ctrl_verb_string = replace(
val = |#{ cl_abap_conv_codepage=>create_in( codepage = `UTF-16BE` )->convert( source = CONV xstring( `00A0` ) ) }#|
pcre = `(*UTF)\N{U+00A0}` with = `x` occ = 0 ).
out->write( ctrl_verb_string ).
"Line breaks
"The results are demonstrated using string templates.
"In the examples, the multi-line mode is enabled in addition.
ctrl_verb_string = replace( val = |abc\ndef\rghi\r\njkl|
pcre = `(*CR)(?m)^` with = `_` occ = 0 ).
out->write( ctrl_verb_string ).
ctrl_verb_string = replace( val = |abc\ndef\rghi\r\njkl|
pcre = `(*LF)(?m)^` with = `_` occ = 0 ).
out->write( ctrl_verb_string ).
ctrl_verb_string = replace( val = |abc\ndef\rghi\r\njkl|
pcre = `(*CRLF)(?m)^` with = `_` occ = 0 ).
out->write( ctrl_verb_string ).
ctrl_verb_string = replace( val = |abc\ndef\rghi\r\njkl|
pcre = `(*ANYCRLF)(?m)^` with = `_` occ = 0 ).
out->write( ctrl_verb_string ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `12) Callouts` ) ).
"The following example shows how to use callouts to call an ABAP method from a PCRE
"regular expression. It creates an object-oriented representation of a PCRE regex using
"the CL_ABAP_REGEX class, applying different PCRE syntaxes for callout specifications.
"The class implements the IF_ABAP_MATCHER_CALLOUT interface, and the callout method uses
"a demo class instance as the callout handler. If the regex matches, the method is called
"for each callout position. The callout method populates a string table with accessible
"details. This demonstrates that regex processing can be influenced, and in the example,
"processing stops when a condition is met. As a result, found is false because the regex
"is not fully processed. For more details, refer to the class documentation.
DATA(text_to_search) = `abcdefghijklmnopq`.
DATA(regex) = cl_abap_regex=>create_pcre( pattern = `(...)(?C1)(..)(?C2)(....)(?C3)(.)(?C"D")(....)(?C"E")(...)(?C"F")` ).
DATA(matcher) = regex->create_matcher( text = text_to_search ).
DATA(handler) = NEW zcl_demo_abap_regex( ).
matcher->set_callout( handler ).
DATA(found) = matcher->match( ).
IF found = abap_false.
out->write( |Pattern not found.\n\n| ).
ENDIF.
out->write( callout_tab ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `13) Conditional Patterns` ) ).
DATA string_cond_pattern TYPE string.
"------- Conditions in the search pattern -------
"Example pattern:
"- The first capturing group matches a digit.
"- The '?' character specifies that the capturing group participates
" in the match optionally.
"- A condition is specified, introduced by '(?...' and referring to
" the first capturing group (the digit) indicated by 1.
"- If the condition is met, i.e. a digit is found, then the yes pattern
" is applied ('.pdf'), which participates in the match.
"- If the condition is not met, i.e. a digit is not found, then the no
" pattern is applied ('.md'), which participates in the match.
"- As a result, '2.pdf' and the two '.md' occurrences (also the one
" without the digit) are replaced.
string_cond_pattern = replace( val = `1.txt,a.txt,2.pdf,b.pdf,3.gif,c.gif,4.md,d.md,5.jpg,e.jpg`
pcre = `(\d)?(?(1)\.pdf|\.md)` with = `#` occ = 0 ).
out->write( string_cond_pattern ).
"------- Conditions in the replacement pattern -------
"Search pattern:
"- The '?' character specifies that the capturing group participates in the match optionally.
"Replacements pattern:
"- '$' followed by the specification of the capturing group that is referred to in curly brackets
"- The capturing group number is followed by a ':' and '+...' that denotes the value if true,
" the value following the other colon denotes the value if false
string_cond_pattern = replace( val = `correct` pcre = `(in)?correct` with = `This is ${1:+not:very} good.` ).
out->write( string_cond_pattern ).
string_cond_pattern = replace( val = `incorrect` pcre = `(in)?correct` with = `This is ${1:+not:very} good.` ).
out->write( string_cond_pattern ).
"Syntax {n:-default}
"Shortcut for the PCRE replacement pattern by specifying
"a default if the capture group does not participate in the match
"'ab' participates in the match, so the default replacement pattern is not
"applied
string_cond_pattern = replace( val = `abcd` pcre = `(ab)?cd` with = `${1:-yz}cd` ).
out->write( string_cond_pattern ).
"'ab' does not participate in the match, so the default replacement pattern is
"applied
string_cond_pattern = replace( val = `cd` pcre = `(ab)?cd` with = `${1:-yz}cd` ).
out->write( string_cond_pattern ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `14) ABAP Statements Using Regular Expressions` ) ).
"-----------------------------------------------------------------------------
"------------------------------- FIND ----------------------------------------
"-----------------------------------------------------------------------------
DATA(str) = `Cathy's black cat on the mat played with Matt.`.
"----- ALL OCCURRENCES addition: Finding all occurrences -----
"The MATCH COUNT addition stores the number of occurrences in a data object.
"Determining the number of letters in a string
FIND ALL OCCURRENCES OF PCRE `[A-Za-z]` IN str MATCH COUNT DATA(a).
out->write( a ).
"----- RESULTS addition: Finding all occurrences -----
"The example also uses the ALL OCCURRENCES addition. The findings are
"stored in a table of type match_result_tab.
FIND ALL OCCURRENCES OF PCRE `\s` IN str RESULTS DATA(b).
out->write( b ).
"----- SUBMATCHES addition: Storing capturing group content in variables -----
"Pattern: anything before and after ' on '
FIND PCRE `(.*)\son\s(.*)` IN str IGNORING CASE SUBMATCHES DATA(c) DATA(d).
out->write( c ).
out->write( d ).
"Using the RESULTS addition to get offset and length information of
"all capturing groups, among others
FIND PCRE `(.*)\son\s(.*)` IN str IGNORING CASE RESULTS DATA(e).
out->write( e ).
"----- MATCH OFFSET/LENGTH additions -----
"The following examples find the last occurrence of a comma and the following content.
"\K denotes that the comma is excluded from the result. Using the offset and length
"values, the part of the string is extracted. The examples underscore that you can
"achieve the same thing differently with different PCRE syntax patterns (which also
"applies to ABAP demonstrated with the use of the substring function and specifying
"the syntax string+off(len) to extract the substring).
str = `abc,def,ghi,jkl,mno,pqr,stu,vwx,yz`.
"Negative lookahead
FIND PCRE `,(?!.*,)\K.*` IN str MATCH OFFSET DATA(off_a) MATCH LENGTH DATA(len_a).
DATA(content_a) = substring( val = str off = off_a len = len_a ).
out->write( off_a ).
out->write( len_a ).
out->write( content_a ).
"Positive lookahead
FIND PCRE `,(?=[^,]*$)\K.*` IN str MATCH OFFSET DATA(off_b) MATCH LENGTH DATA(len_b).
DATA(content_b) = str+off_b(len_b).
out->write( off_b ).
out->write( len_b ).
out->write( content_b ).
ASSERT content_a = `yz`.
ASSERT content_b = content_a.
"----- IN TABLE addition: Searching in internal tables -----
"The internal table must have a character-like line type.
"Searching in an internal table and retrieving line, offset, length information
"The example only searches for the first occurrence. See another example below
"that uses the ALL OCCURRENCES and RESULTS additions.
DATA(itab) = VALUE string_table( ( `Cathy's black cat on the mat played with the friend of Matt.` ) ).
"Pattern: 't' at the beginning of a word followed by another character
FIND FIRST OCCURRENCE OF PCRE `\bt.` IN TABLE itab
IGNORING CASE MATCH LINE DATA(f) MATCH OFFSET DATA(g) MATCH LENGTH DATA(h).
out->write( f ).
out->write( g ).
out->write( h ).
"----- Searching in internal tables -----
"The RESULTS addition stores findings in an internal table of type match_result_tab when
"ALL OCCURRENCES is used.
"Submatches (i.e. length and offset values of the submatches) are stored in internal
"tables themselves. Therefore, the example uses nested loops and the substring function
"to retrieve the strings.
"The objective of the following example is to extract the content of the segments that
"are positioned within /.../ in a URL. The segments are stored in an internal table.
DATA(url) = `https://help.sap.com/docs/abap-cloud/abap-concepts/controlled-sap-luw/`.
DATA url_parts TYPE string_table.
FIND ALL OCCURRENCES OF PCRE `(?<=\/)([^\/]+)(?=\/)` IN url RESULTS DATA(res).
out->write( res ).
"Details on the regular expression:
"- Positive lookbehind (?<=\/) that determines that the content is preceded by `/`
"- Positive lookahead (?=\/) that determines that the content is followed by `/
"- ([^\/]+) in between determines that any sequence of characters that are not `/` are matched
"- The match is put in parentheses to store the submatch
LOOP AT res INTO DATA(finding).
LOOP AT finding-submatches INTO DATA(sub).
DATA(url_part) = substring( val = url off = sub-offset len = sub-length ).
APPEND url_part TO url_parts.
ENDLOOP.
ENDLOOP.
out->write( url_parts ).
"The following statement uses nested iteration expressions with FOR instead of nested
"LOOP statements.
DATA(url_parts_for_loop) = VALUE string_table( FOR wa1 IN res
FOR wa2 IN wa1-submatches
( substring( val = url off = wa2-offset len = wa2-length ) ) ).
ASSERT url_parts = url_parts_for_loop.
"-----------------------------------------------------------------------------
"------------------------------- REPLACE -------------------------------------
"-----------------------------------------------------------------------------
DATA(str_replace) = `ab apppc app`.
DATA(str_replace_copy) = str_replace.
"Changing the source field directly with a REPLACE statement; same as above
REPLACE PCRE `(.*?)PP(.*)` IN str_replace WITH `$2#$1` IGNORING CASE.
out->write( str_replace ).
str_replace = str_replace_copy.
"ALL OCCURRENCES addition
REPLACE ALL OCCURRENCES OF PCRE `\s` IN str_replace WITH `#`.
out->write( str_replace ).
str_replace = str_replace_copy.
REPLACE ALL OCCURRENCES OF PCRE `p.` IN str_replace WITH `#`
REPLACEMENT COUNT DATA(repl_cnt) "3
RESULTS DATA(repl_res).
out->write( repl_cnt ).
out->write( repl_res ).
DATA(str_table_original) = VALUE string_table( ( `a1bc2` ) ( `d3ef` ) ( `4ghi` ) ( `jkl` ) ).
DATA(str_table) = str_table_original.
"Replacing all occurrences in a table
"RESULTS addition: Storing information in an internal table of type repl_result_tab
REPLACE ALL OCCURRENCES OF PCRE `\d`
IN TABLE str_table
WITH `#`
RESULTS DATA(res_table).
out->write( res_table ).
str_table = str_table_original.
"Replacing the first occurrence in a table
"RESULTS addition: Storing information in a structure of type repl_result
REPLACE FIRST OCCURRENCE OF PCRE `\d`
IN TABLE str_table
WITH `#`
RESULTS DATA(res_structure).
out->write( res_structure ).
str_table = str_table_original.
"Restricting the search range in an internal table
REPLACE ALL OCCURRENCES OF PCRE `\d`
IN TABLE str_table
FROM 1 TO 2
WITH `#`.
out->write( str_table ).
str_table = str_table_original.
"Offsets can be optionally specified (also only the offset of start or end line possible)
REPLACE ALL OCCURRENCES OF PCRE `\d`
IN TABLE str_table
FROM 1 OFFSET 3 TO 2 OFFSET 2
WITH `#`.
out->write( str_table ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `15) Built-In Functions in ABAP Using Regular Expressions` ) ).
DATA(text) = `Pieces of cakes.`.
"---------------- find ----------------
"The find function searches for the subtexting specified and returns the offset
DATA(find) = find( val = text pcre = `\.` ).
out->write( find ).
"---------------- find_end ----------------
"find_end returns the sum of the offset of the occurrence plus the length of the match
DATA(find_end) = find_end( val = text pcre = `\s` ).
out->write( find_end ).
"---------------- count ----------------
DATA(count_a) = count( val = text pcre = `\s` ).
out->write( count_a ).
DATA(count_b) = count( val = text pcre = `.` ).
out->write( count_b ).
"---------------- match ----------------
DATA(match_a) = match( val = `The email address is [email protected].`
pcre = `\w+(\.\w+)*@(\w+\.)+(\w{2,4})` ).
out->write( match_a ).
"Find blank (without inlcuding it in the result indicated by \K) and
"the following 2 characters, second occurrence
DATA(match_b) = match( val = `The email address is [email protected].`
pcre = `\s\K..`
occ = 2 ).
out->write( match_b ).
"---------------- replace ----------------
DATA(replace_a) = replace( val = text pcre = `\s` with = `#` ).
out->write( replace_a ).
DATA(replace_b) = replace( val = text pcre = `\s` occ = 2 with = `#` ).
out->write( replace_b ).
"---------------- substring_* ----------------
text = `Lorem ipsum dolor sit amet`.
"Extracting a substring ...
"... after a matching regular expression
DATA(substring_after) = substring_after( val = text pcre = `\s` occ = 2 ).
out->write( substring_after ).
"... before a matching regular expression
DATA(substring_before) = substring_before( val = text pcre = `\s` occ = 2 ).
out->write( substring_before ).
"... from a matching regular expression on including the match
DATA(substring_from) = substring_from( val = text pcre = `\s` occ = 2 ).
out->write( substring_from ).
"... up to a matching regular expression including the match
DATA(substring_to) = substring_to( val = text pcre = `\s` occ = 2 ).
out->write( substring_to ).
"---------------- Predicate functions: contains, matches ----------------
"The built-in functions 'contains' returns a truth value. In the following
"examples, the truth value is demonstrated using the xsdbool function.
DATA(contains) = xsdbool( contains( val = `abc def` pcre = `\s` ) ).
out->write( contains ).
"The built-in functions 'matches' compares a search range of the textual argument
"with a regular expression.
DATA(matches) = xsdbool( matches( val = `[email protected]`
pcre = `\w+(\.\w+)*@(\w+\.)+(\w{2,4})` ) ).
out->write( matches ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `16) Built-In Functions in ABAP SQL and CDS Using Regular Expressions` ) ).
"Populating demo database tables
zcl_demo_abap_aux=>fill_dbtabs( ).
SELECT SINGLE
carrid, "LH
carrname, "Lufthansa
url, "http://www.lufthansa.com
"Checks if expression contains a PCRE expression;
"case-sensitive by default (case_sensitive parameter can be specified)
"Notes on the 1 = found, 0 = not found
"1
like_regexpr( pcre = '\..', "Period that is followed by any character
value = url ) AS like_regex,
"Searches a PCRE pattern, returns offset of match;
"many optional parameters: occurrence, case_sensitive, start, group
"21
locate_regexpr( pcre = '\..', "Period followed by any character
value = url,
occurrence = 2 ) "2nd occurrence in the string
AS locate_regexpr,
"Searches a PCRE pattern, returns offset of match + 1;
"many optional parameters: occurrence, case_sensitive, start, group
"2
locate_regexpr_after( pcre = '.', "Any character
value = url,
occurrence = 1 ) AS locate_regexpr_after,
"Counts all occurrences of found PCRE patterns
"2
occurrences_regexpr( pcre = '\..', "Period that is followed by any character
value = url ) AS occ_regex,
"Replaces a found PCRE expression;
"more parameters possible: occurrence, case_sensitive, start
"http://www#ufthansa#om
replace_regexpr( pcre = '\..', "Period that is followed by any character
value = url,
with = '#' ) AS replace_regex,
"Searches for a PCRE expression and returns the matched substring
"More parameters possible: occurrence, case_sensitive, start, group
".lu
substring_regexpr( pcre = '\...', "Period that is followed by any two characters
value = url ) AS substring_regexpr
FROM zdemo_abap_carr
WHERE carrid = 'LH'
INTO @DATA(builtin_func_regex).
out->write( builtin_func_regex ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `17) System Classes for Regular Expressions: CL_ABAP_REGEX and CL_ABAP_MATCHER` ) ).
"Note: The example results are not output except one.
"Example test_stringing
DATA(test_string) = `a1 # B2 ? cd . E3`.
"----------- Creating an instance of a regular expression -----------
"Creating an instance of a regular expression with PCRE syntax
"using cl_abap_regex
"Example pattern: Any-non digit followed by a digit
DATA(regular_expr) = cl_abap_regex=>create_pcre( pattern = `\D\d`
ignore_case = abap_true ).
"----------- Creating matchers -----------
"Two ways are possible (both ways return references of type
"ref to cl_abap_matcher):
"- create_matcher method of the cl_abap_regex class
"- create_pcre method of the cl_abap_matcher class
"Note that several importing parameters are available to enable
"further settings of the regular expression, e.g. ignoring the
"case, using the extended mode, etc. The examples pass a test_stringing
"to the 'text' parameter. You can also specify internal tables
"with the 'table' parameter and more.
"Creating a matcher using the create_matcher method of the cl_abap_regex class
DATA(matcher_1) = regular_expr->create_matcher( text = test_string ).
"Creating a matcher in one go using method chaining
DATA(matcher_2) = cl_abap_regex=>create_pcre( pattern = `\D\d`
ignore_case = abap_true
)->create_matcher( text = test_string ).
"Creating a matcher using the create_pcre method of the cl_abap_matcher class
DATA(matcher_3) = cl_abap_matcher=>create_pcre( pattern = `\D\d`
text = test_string
ignore_case = abap_true ).
"----------- Exploring searching and replacing -----------
"--- Finding all occurrences using the find_all method ---
"In the example, result has the type match_result_tab containing the findings.
DATA(result_fa1) = matcher_1->find_all( ).
DATA(result_fa2) = matcher_2->find_all( ).
ASSERT result_fa2 = result_fa1.
"Getting the result in one go using method chaining with cl_abap_matcher
DATA(result_fa3) = cl_abap_matcher=>create_pcre( pattern = `\D\d`
text = test_string
ignore_case = abap_true
)->find_all( ).
ASSERT result_fa3 = result_fa1.
"--- Example with submatches ---
test_string = `XabcdXXefgXXhXXijklmnXX`.
DATA(result_fa4) = cl_abap_matcher=>create_pcre( pattern = `X(.*?)X`
text = test_string
ignore_case = abap_true
)->find_all( ).
"--- Replacing all occurrences using the 'replace_all' method ---
DATA(matcher_repl_1) = cl_abap_regex=>create_pcre( pattern = `X(.*?)X`
)->create_matcher( text = test_string ).
DATA(repl_count_1) = matcher_repl_1->replace_all( newtext = `#$1#` ).
DATA(repl_result_1) = matcher_repl_1->text.
"Using cl_abap_matcher
DATA(matcher_repl_2) = cl_abap_matcher=>create_pcre( pattern = `X(.*?)X`
text = test_string ).
DATA(repl_count_2) = matcher_repl_2->replace_all( newtext = `#$1#` ).
DATA(repl_result_2) = matcher_repl_2->text.
"---- Sequential processing of the regular expression ---
"---- using the find_next method ------------------------
"The example explores various other methods, and writes
"information to a test_stringing table.
test_string = `a1bc2def3ghij45klm67opqr8stuvwx90yz`.
DATA(matcher_fn) = cl_abap_matcher=>create_pcre( pattern = `\d(\D.)`
text = test_string ).
DATA test_stringtab TYPE string_table.
WHILE matcher_fn->find_next( ) = abap_true.
APPEND |---- Finding { sy-index } -----| TO test_stringtab.
"Type match_result
DATA(match_result) = matcher_fn->get_match( ).
DATA(offset) = matcher_fn->get_offset( ).
DATA(length) = matcher_fn->get_length( ).
DATA(matched_content) = test_string+offset(length).
APPEND |Match offset: { offset }| TO test_stringtab.
APPEND |Match length: { length }| TO test_stringtab.
APPEND |Match content: { matched_content }| TO test_stringtab.
"Type match_result
DATA(subgroup) = matcher_fn->get_match( )-submatches.
LOOP AT subgroup INTO DATA(wa).
DATA(sub_tabix) = sy-tabix.
DATA(submatch_line) = wa.
DATA(submatch_offset) = wa-offset.
DATA(submatch_length) = wa-length.
DATA(submatch) = matcher_fn->get_submatch( sub_tabix ).
APPEND |Submatch { sub_tabix } offset: { submatch_offset }| TO test_stringtab.
APPEND |Submatch { sub_tabix } length: { submatch_length }| TO test_stringtab.
APPEND |Submatch { sub_tabix } content: { submatch }| TO test_stringtab.
ENDLOOP.
ENDWHILE.
out->write( test_stringtab ).
"---- Using an object of type cl_abap_regex in ABAP ---