-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migrate to Java: PredefinedPosixCharSets
- Loading branch information
1 parent
7782b8e
commit 46e94d9
Showing
8 changed files
with
154 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package dregex.impl; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
|
||
import static dregex.impl.RegexTree.CharSet$; | ||
import static dregex.impl.RegexTree.CharSet; | ||
import static dregex.impl.RegexTree.Lit; | ||
import static dregex.impl.RegexTree.CharRange; | ||
|
||
public class PredefinedPosixCharSets { | ||
|
||
public static CharSet lower = CharSet$.MODULE$.fromRange(new CharRange('a', 'z')); | ||
public static CharSet upper = CharSet$.MODULE$.fromRange(new CharRange('A', 'Z')); | ||
public static CharSet alpha = CharSet$.MODULE$.fromCharSetsJava(List.of(lower, upper)); | ||
public static CharSet digit = CharSet$.MODULE$.fromRange(new CharRange('0', '9')); | ||
public static CharSet alnum = CharSet$.MODULE$.fromCharSetsJava(List.of(alpha, digit)); | ||
public static CharSet punct = CharSet$.MODULE$.fromJava( | ||
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~".chars().mapToObj(ch -> new Lit(ch)).collect(Collectors.toList())); | ||
public static CharSet graph = CharSet$.MODULE$.fromCharSetsJava(List.of(alnum, punct)); | ||
public static CharSet space = CharSet$.MODULE$.fromJava(List.of(new Lit('\n'), new Lit('\t'), | ||
new Lit('\r'), new Lit('\f'), new Lit(' '), new Lit(0x0B))); | ||
public static CharSet wordChar = CharSet$.MODULE$.fromJava( | ||
Stream.concat(alnum.javaRanges().stream(), Stream.of(new Lit('_'))).collect(Collectors.toList())); | ||
|
||
public static Map<String, CharSet> classes = Map.ofEntries( | ||
Map.entry("Lower", lower), | ||
Map.entry("Upper", upper), | ||
Map.entry("ASCII", CharSet$.MODULE$.fromRange(new CharRange(0, 0x7F))), | ||
Map.entry("Alpha", alpha), | ||
Map.entry("Digit", digit), | ||
Map.entry("Alnum", alnum), | ||
Map.entry("Punct", punct), | ||
Map.entry("Graph", graph), | ||
Map.entry("Print", CharSet$.MODULE$.fromJava(Stream.concat(graph.javaRanges().stream(), | ||
Stream.of(new Lit(0x20))).collect(Collectors.toList()))), | ||
Map.entry("Blank", CharSet$.MODULE$.fromJava(List.of(new Lit(0x20), new Lit('\t')))), | ||
Map.entry("Cntrl", CharSet$.MODULE$.fromJava(List.of(new CharRange(0, 0x1F), new Lit(0x7F)))), | ||
Map.entry("XDigit", CharSet$.MODULE$.fromJava(Stream.concat(digit.javaRanges().stream(), | ||
Stream.of(new CharRange('a','f'), new CharRange('A', 'F'))).collect(Collectors.toList()))), | ||
Map.entry("Space", space)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package dregex.impl; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStreamReader; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
public class UnicodeBlocks { | ||
|
||
private static final Map<String, UnicodeDatabaseReader.Range> ranges; | ||
|
||
static { | ||
try (var blocksFile = UnicodeBlocks.class.getResourceAsStream("/Blocks.txt")) { | ||
ranges = UnicodeDatabaseReader.getBlocks(new InputStreamReader(blocksFile)); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
private static final Map<String, String> synonyms = Map.of( | ||
"Greek and Coptic", "Greek" | ||
); | ||
|
||
public static final Map<String, RegexTree.CharSet> unicodeBlocks; | ||
|
||
static { | ||
unicodeBlocks = new HashMap<>(); | ||
for (var entry : ranges.entrySet()) { | ||
var block = entry.getKey(); | ||
var range = entry.getValue(); | ||
var charSet = RegexTree.CharSet$.MODULE$.fromRange(new RegexTree.CharRange(range.from, range.to)); | ||
unicodeBlocks.put(UnicodeDatabaseReader.canonicalizeBlockName(block), charSet); | ||
} | ||
for (var entry : synonyms.entrySet()) { | ||
var block = entry.getKey(); | ||
var alias = entry.getValue(); | ||
unicodeBlocks.put(UnicodeDatabaseReader.canonicalizeBlockName(alias), | ||
unicodeBlocks.get(UnicodeDatabaseReader.canonicalizeBlockName(block))); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.