Skip to content

Commit

Permalink
(feat) regex: Pattern.UNIX_LINES
Browse files Browse the repository at this point in the history
  • Loading branch information
alexey-pelykh committed Jun 25, 2024
1 parent cee7498 commit 17576b6
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 14 deletions.
2 changes: 1 addition & 1 deletion PCRE2_API.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ Here's the list of the PCRE2 API functions exposed via `org.pcre4j.api.IPcre2` a
| | [pcre2_set_heap_limit](https://www.pcre.org/current/doc/html/pcre2_set_heap_limit.html) | Set the match backtracking heap limit |
| | [pcre2_set_match_limit](https://www.pcre.org/current/doc/html/pcre2_set_match_limit.html) | Set the match limit |
| | [pcre2_set_max_pattern_length](https://www.pcre.org/current/doc/html/pcre2_set_max_pattern_length.html) | Set the maximum length of pattern |
| | [pcre2_set_newline](https://www.pcre.org/current/doc/html/pcre2_set_newline.html) | Set the newline convention |
| | [pcre2_set_newline](https://www.pcre.org/current/doc/html/pcre2_set_newline.html) | Set the newline convention |
| | [pcre2_set_offset_limit](https://www.pcre.org/current/doc/html/pcre2_set_offset_limit.html) | Set the offset limit |
| | [pcre2_set_parens_nest_limit](https://www.pcre.org/current/doc/html/pcre2_set_parens_nest_limit.html) | Set the parentheses nesting limit |
| | [pcre2_set_recursion_limit](https://www.pcre.org/current/doc/html/pcre2_set_recursion_limit.html) | Obsolete: use pcre2_set_depth_limit |
Expand Down
32 changes: 32 additions & 0 deletions api/src/main/java/org/pcre4j/api/IPcre2.java
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,34 @@ public interface IPcre2 {
public static final int CONVERT_GLOB_NO_WILD_SEPARATOR = 0x00000030;
public static final int CONVERT_GLOB_NO_STARSTAR = 0x00000050;

/**
* Carriage return only (\r)
*/
public static final int NEWLINE_CR = 1;

/**
* Linefeed only (\n)
*/
public static final int NEWLINE_LF = 2;

/**
* CR followed by LF only (\r\n)
*/
public static final int NEWLINE_CRLF = 3;

/**
* Any Unicode newline sequence
*/
public static final int NEWLINE_ANY = 4;

/**
* Any of {@link #NEWLINE_CR}, {@link #NEWLINE_LF}, or {@link #NEWLINE_CRLF}
*/
public static final int NEWLINE_ANYCRLF = 5;

/**
* NUL character (\0)
*/
public static final int NEWLINE_NUL = 6;

/**
Expand Down Expand Up @@ -924,4 +947,13 @@ public interface IPcre2 {
* @param ovector the array to store the output vector
*/
public void getOvector(long matchData, long[] ovector);

/**
* Set the newline convention within a compile context
*
* @param ccontext the compile context handle
* @param newline the newline convention
* @return 0 on success, otherwise a negative error code
*/
public int setNewline(long ccontext, int newline);
}
24 changes: 24 additions & 0 deletions ffm/src/main/java/org/pcre4j/ffm/Pcre2.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ public class Pcre2 implements IPcre2 {
private final MethodHandle pcre2_get_ovector_count;
private final MethodHandle pcre2_get_ovector_pointer;

private final MethodHandle pcre2_set_newline;

/**
* Constructs a new PCRE2 API using the common library name "pcre2-8".
*/
Expand Down Expand Up @@ -272,6 +274,14 @@ public Pcre2(String library, String suffix) {
ValueLayout.ADDRESS // pcre2_match_data*
)
);

pcre2_set_newline = LINKER.downcallHandle(
SYMBOL_LOOKUP.find("pcre2_set_newline" + suffix).orElseThrow(),
FunctionDescriptor.of(ValueLayout.JAVA_INT, // int
ValueLayout.ADDRESS, // pcre2_compile_context*
ValueLayout.JAVA_INT // int
)
);
}

@Override
Expand Down Expand Up @@ -783,4 +793,18 @@ public void getOvector(long matchData, long[] ovector) {
throw new RuntimeException(e);
}
}

@Override
public int setNewline(long ccontext, int newline) {
try (var arena = Arena.ofConfined()) {
final var pCContext = MemorySegment.ofAddress(ccontext);

return (int) pcre2_set_newline.invokeExact(
pCContext,
newline
);
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
}
7 changes: 7 additions & 0 deletions jna/src/main/java/org/pcre4j/jna/Pcre2.java
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,11 @@ public void getOvector(long matchData, long[] ovector) {
pOvector.read(0, ovector, 0, ovector.length);
}

@Override
public int setNewline(long ccontext, int value) {
return library.pcre2_set_newline(new Pointer(ccontext), value);
}

private interface Library extends com.sun.jna.Library {
int pcre2_config(int what, Pointer where);

Expand Down Expand Up @@ -397,6 +402,8 @@ int pcre2_match(
int pcre2_get_ovector_count(Pointer matchData);

Pointer pcre2_get_ovector_pointer(Pointer matchData);

int pcre2_set_newline(Pointer ccontext, int value);
}

private record SuffixFunctionMapper(String suffix) implements FunctionMapper {
Expand Down
16 changes: 16 additions & 0 deletions lib/src/main/java/org/pcre4j/Pcre2CompileContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ public long handle() {
return handle;
}

/**
* Set the newline convention
*
* @param newline the newline convention
*/
public void setNewline(Pcre2Newline newline) {
if (newline == null) {
throw new IllegalArgumentException("newline cannot be null");
}
final var result = api.setNewline(handle, newline.value());
if (result != 0) {
final var errorMessage = Pcre4jUtils.getErrorMessage(api, result);
throw new RuntimeException("Failed set the newline convention", new IllegalStateException(errorMessage));
}
}

private record Clean(IPcre2 api, long compileContext) implements Runnable {
@Override
public void run() {
Expand Down
35 changes: 29 additions & 6 deletions lib/src/main/java/org/pcre4j/Pcre2Newline.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,35 @@
import java.util.Optional;

public enum Pcre2Newline {
NEWLINE_CR(IPcre2.NEWLINE_CR),
NEWLINE_LF(IPcre2.NEWLINE_LF),
NEWLINE_CRLF(IPcre2.NEWLINE_CRLF),
NEWLINE_ANY(IPcre2.NEWLINE_ANY),
NEWLINE_ANYCRLF(IPcre2.NEWLINE_ANYCRLF),
NEWLINE_NUL(IPcre2.NEWLINE_NUL);
/**
* Carriage return only (\r)
*/
CR(IPcre2.NEWLINE_CR),

/**
* Linefeed only (\n)
*/
LF(IPcre2.NEWLINE_LF),

/**
* CR followed by LF only (\r\n)
*/
CRLF(IPcre2.NEWLINE_CRLF),

/**
* Any Unicode newline sequence
*/
ANY(IPcre2.NEWLINE_ANY),

/**
* Any of {@link #CR}, {@link #LF}, or {@link #CRLF}
*/
ANYCRLF(IPcre2.NEWLINE_ANYCRLF),

/**
* NUL character (\0)
*/
NUL(IPcre2.NEWLINE_NUL);

/**
* The integer value
Expand Down
21 changes: 16 additions & 5 deletions regex/src/main/java/org/pcre4j/regex/Pattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,14 @@ public class Pattern {
*/
public static final int UNICODE_CHARACTER_CLASS = java.util.regex.Pattern.UNICODE_CHARACTER_CLASS;

/**
* A {@link java.util.regex.Pattern#UNIX_LINES}-compatible flag implemented via {@link org.pcre4j.Pcre2Newline#LF}
*/
public static final int UNIX_LINES = java.util.regex.Pattern.UNIX_LINES;

// TODO: public static final int CANON_EQ = java.util.regex.Pattern.CANON_EQ;
// TODO: public static final int COMMENTS = java.util.regex.Pattern.COMMENTS;
// TODO: public static final int UNICODE_CASE = java.util.regex.Pattern.UNICODE_CASE;
// TODO: public static final int UNIX_LINES = java.util.regex.Pattern.UNIX_LINES;
/* package-private */ final Pcre2Code code;
/* package-private */ final Pcre2Code matchingCode;
/* package-private */ final Pcre2Code lookingAtCode;
Expand Down Expand Up @@ -108,14 +112,21 @@ private Pattern(IPcre2 api, String regex, int flags) {
compileOptions.add(Pcre2CompileOption.UCP);
}

final var compileContext = new Pcre2CompileContext(api, null);
if ((flags & UNIX_LINES) != 0) {
compileContext.setNewline(Pcre2Newline.LF);
} else {
compileContext.setNewline(Pcre2Newline.ANY);
}

try {
if (Pcre4jUtils.isJitSupported(api)) {
this.code = new Pcre2JitCode(
api,
regex,
compileOptions,
EnumSet.of(Pcre2JitOption.COMPLETE),
null
compileContext
);

final var matchingCompileOptions = EnumSet.copyOf(compileOptions);
Expand All @@ -126,7 +137,7 @@ private Pattern(IPcre2 api, String regex, int flags) {
regex,
matchingCompileOptions,
EnumSet.of(Pcre2JitOption.COMPLETE),
null
compileContext
);

final var lookingAtCompileOptions = EnumSet.copyOf(compileOptions);
Expand All @@ -136,14 +147,14 @@ private Pattern(IPcre2 api, String regex, int flags) {
regex,
lookingAtCompileOptions,
EnumSet.of(Pcre2JitOption.COMPLETE),
null
compileContext
);
} else {
this.code = new Pcre2Code(
api,
regex,
compileOptions,
null
compileContext
);
this.matchingCode = null;
this.lookingAtCode = null;
Expand Down
42 changes: 40 additions & 2 deletions regex/src/test/java/org/pcre4j/regex/PatternTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@

import java.util.stream.Stream;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.*;

/**
* Tests to ensure API likeness of the {@link Pattern} to the {@link java.util.regex.Pattern}.
Expand Down Expand Up @@ -105,4 +104,43 @@ void withUnicodeCharacterClass(IPcre2 api) {
assertEquals(javaMatcher.group(), pcre4jMatcher.group());
}

@ParameterizedTest
@MethodSource("parameters")
void withoutUnixNewline(IPcre2 api) {
var regex = "^A$";
var input = "A\u0085B";
var javaMatcher = java.util.regex.Pattern.compile(
regex,
java.util.regex.Pattern.MULTILINE
).matcher(input);
var pcre4jMatcher = Pattern.compile(
api,
regex,
Pattern.MULTILINE
).matcher(input);

assertEquals(javaMatcher.find(), pcre4jMatcher.find());
assertEquals(javaMatcher.group(), pcre4jMatcher.group());
}

@ParameterizedTest
@MethodSource("parameters")
void withUnixNewline(IPcre2 api) {
var regex = "^A$";
var input = "A\u0085B";
var javaMatcher = java.util.regex.Pattern.compile(
regex,
java.util.regex.Pattern.MULTILINE | java.util.regex.Pattern.UNIX_LINES
).matcher(input);
var pcre4jMatcher = Pattern.compile(
api,
regex,
Pattern.MULTILINE | Pattern.UNIX_LINES
).matcher(input);

assertEquals(javaMatcher.find(), pcre4jMatcher.find());
assertThrows(IllegalStateException.class, javaMatcher::group);
assertThrows(IllegalStateException.class, pcre4jMatcher::group);
}

}

0 comments on commit 17576b6

Please sign in to comment.