diff --git a/.editorconfig b/.editorconfig index 50f5d2f..299d8bf 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,3 +9,9 @@ end_of_line = lf trim_trailing_whitespace = true insert_final_newline = true charset = utf-8 + +[*.json] +indent_size = 2 + +[*.{yml,yaml}] +indent_size = 2 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1eb2b59 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +# Gradle Wrapper script for Linux should use lf +/gradlew text eol=lf + +# Gradle Wrapper script for Windows should use crlf +/gradlew.bat text eol=crlf diff --git a/.github/workflows/pull-request.yaml b/.github/workflows/pull-request.yaml new file mode 100644 index 0000000..e7bb485 --- /dev/null +++ b/.github/workflows/pull-request.yaml @@ -0,0 +1,57 @@ +name: Pull Request + +on: + pull_request: + branches: + - main + +jobs: + build: + strategy: + matrix: + os: [ ubuntu-latest ] + java-distribution: + - temurin + - zulu + - adopt-hotspot + - adopt-openj9 + - liberica + - microsoft + - corretto + - semeru + - oracle + - dragonwell + fail-fast: false + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up "${{ matrix.java-distribution }}" JDK 21 + uses: actions/setup-java@v4 + with: + distribution: ${{ matrix.java-distribution }} + java-version: 21 + + - name: Cache Gradle packages + uses: actions/cache@v4 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: gradle-${{ runner.os }}-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: gradle-${{ runner.os }} + + - name: Install PCRE (Ubuntu) + if: ${{ matrix.os == 'ubuntu-latest' }} + run: sudo apt-get install -y libpcre2-8-0 + + - name: Build with Gradle (Ubuntu) + if: ${{ matrix.os == 'ubuntu-latest' }} + run: ./gradlew build -Dpcre2.library.path=/usr/lib/x86_64-linux-gnu + + - name: Run tests (Ubuntu) + if: ${{ matrix.os == 'ubuntu-latest' }} + run: ./gradlew test -Dpcre2.library.path=/usr/lib/x86_64-linux-gnu diff --git a/.gitignore b/.gitignore index dcb084b..6ae35f6 100644 --- a/.gitignore +++ b/.gitignore @@ -181,3 +181,25 @@ fabric.properties # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml hs_err_pid* replay_pid* +## Origin: https://github.com/github/gitignore/blob/main/Gradle.gitignore +.gradle +**/build/ +!src/**/build/ + +# Ignore Gradle GUI config +gradle-app.setting + +# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) +!gradle-wrapper.jar + +# Avoid ignore Gradle wrappper properties +!gradle-wrapper.properties + +# Cache of project +.gradletasknamecache + +# Eclipse Gradle plugin generated files +# Eclipse Core +.project +# JDT-specific (Eclipse Java Development Tools) +.classpath diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/checkstyle-idea.xml b/.idea/checkstyle-idea.xml new file mode 100644 index 0000000..75470a0 --- /dev/null +++ b/.idea/checkstyle-idea.xml @@ -0,0 +1,15 @@ + + + + 10.17.0 + JavaOnly + + + \ No newline at end of file diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..b86273d --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..db0b433 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,29 @@ + + + + \ No newline at end of file diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml new file mode 100644 index 0000000..fdc392f --- /dev/null +++ b/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..976db11 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..4f81299 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "java.configuration.updateBuildConfiguration": "automatic" +} diff --git a/README.md b/README.md index a87b1b4..00bf726 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,62 @@ -# PCRE4J: [PCRE](https://www.pcre.org) for Java +# PCRE4J: PCRE for Java -The PCRE4J project is a Java wrapper for the [Perl Compatible Regular Expressions](https://github.com/PCRE2Project/pcre2) library. +The PCRE4J project's goal is to bring the power of the [PCRE](https://www.pcre.org) library to Java. + +This project is brought to you by [Alexey Pelykh](https://github.com/alexey-pelykh) with a great gratitude to the PCRE +library author [Philip Hazel](https://github.com/PhilipHazel) and its contributors. + +## Usage + +```java +import org.pcre4j.Pcre2Code; +import org.pcre4j.Pcre2CompileOption; +import org.pcre4j.Pcre2MatchData; +import org.pcre4j.Pcre2MatchOption; +import org.pcre4j.Pcre4j; +import org.pcre4j.Pcre4jUtils; +// TODO: Select one of the following imports for the backend you want to use: +import org.pcre4j.jna.Pcre2; +// import org.pcre4j.ffm.Pcre2; + +public class Usage { + static { + Pcre4j.setup(new Pcre2()); + } + + public static String[] example(String pattern, String subject) { + final var code = new Pcre2Code( + pattern, + EnumSet.noneOf(Pcre2CompileOption.class), + null + ); + final var matchData = new Pcre2MatchData(code); + code.match( + subject, + 0, + EnumSet.noneOf(Pcre2MatchOption.class), + matchData, + null + ); + return Pcre4jUtils.getMatchGroups(code, subject, matchData); + } +} +``` + +## Backends + +The PCRE4J library supports several backends to invoke the `pcre2` API. + +### `jna` + +The `jna` backend uses the [Java Native Access](https://github.com/java-native-access/jna) library to invoke the `pcre2` +shared library. For this backend to work, the `pcre2` shared library must be installed on the system and be visible to +the JNA. + +### `ffm` + +The `ffm` backend uses the [Foreign Functions and Memory API](https://docs.oracle.com/en/java/javase/21/core/foreign-function-and-memory-api.html) +to invoke the `pcre2` shared library. For this backend to work, the `pcre2` shared library must be installed on the +system and be visible via `java.library.path`. + +Note that `--enable-preview` must be passed to the Java compiler to enable the preview features for this backend to be +used. diff --git a/api/build.gradle.kts b/api/build.gradle.kts new file mode 100644 index 0000000..5312e60 --- /dev/null +++ b/api/build.gradle.kts @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +plugins { + `java-library` + `maven-publish` +} + +repositories { + mavenCentral() +} + +dependencies { + testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") + testRuntimeOnly("org.junit.platform:junit-platform-launcher") +} + +configurations { + implementation { + resolutionStrategy.failOnVersionConflict() + } +} + +sourceSets { + main { + java.srcDir("src/main/java") + } +} + +java { + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 + + toolchain { + languageVersion = JavaLanguageVersion.of(21) + } + + withSourcesJar() + withJavadocJar() +} + +tasks.withType { + useJUnitPlatform() +} + +tasks.named("sourcesJar") { + duplicatesStrategy = DuplicatesStrategy.INCLUDE +} + +publishing { + publications { + create("mavenJava") { + from(components["java"]) + + artifact(tasks.named("sourcesJar")) + artifact(tasks.named("javadocJar")) + + groupId = "org.pcre4j" + artifactId = project.name + version = findProperty("pcre4j.version") as String? ?: "0.0.0-SNAPSHOT" + } + } + + repositories { + mavenCentral { + credentials { + username = findProperty("pcre4j.mavenCentral.user") as String? ?: "" + password = findProperty("pcre4j.mavenCentral.password") as String? ?: "" + } + } + } +} diff --git a/api/src/main/java/org/pcre4j/api/IPcre2.java b/api/src/main/java/org/pcre4j/api/IPcre2.java new file mode 100644 index 0000000..6969e9c --- /dev/null +++ b/api/src/main/java/org/pcre4j/api/IPcre2.java @@ -0,0 +1,862 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j.api; + +import java.nio.ByteBuffer; + +/** + * Interface for the PCRE2 API. + * + * @see PCRE2 API + */ +public interface IPcre2 { + + /** + * Force pattern anchoring + */ + public static final int ANCHORED = 0x80000000; + + /** + * Do not check the pattern for UTF validity (only relevant if UTF is set) + */ + public static final int NO_UTF_CHECK = 0x40000000; + + /** + * Pattern can match only at end of subject + */ + public static final int ENDANCHORED = 0x20000000; + + /** + * Allow empty classes + */ + public static final int ALLOW_EMPTY_CLASS = 0x00000001; + + /** + * Alternative handling of ⧵u, ⧵U, and ⧵x + */ + public static final int ALT_BSUX = 0x00000002; + + /** + * Compile automatic callouts + */ + public static final int AUTO_CALLOUT = 0x00000004; + + /** + * Do caseless matching + */ + public static final int CASELESS = 0x00000008; + + /** + * $ not to match newline at end + */ + public static final int DOLLAR_ENDONLY = 0x00000010; + + /** + * . matches anything including NL + */ + public static final int DOTALL = 0x00000020; + + /** + * Allow duplicate names for subpatterns + */ + public static final int DUPNAMES = 0x00000040; + + /** + * Ignore white space and # comments + */ + public static final int EXTENDED = 0x00000080; + + /** + * Force matching to be before newline + */ + public static final int FIRSTLINE = 0x00000100; + + /** + * Match unset backreferences + */ + public static final int MATCH_UNSET_BACKREF = 0x00000200; + + /** + * ^ and $ match newlines within data + */ + public static final int MULTILINE = 0x00000400; + + /** + * Lock out PCRE2_UCP, e.g. via (*UCP) + */ + public static final int NEVER_UCP = 0x00000800; + + /** + * Lock out PCRE2_UTF, e.g. via (*UTF) + */ + public static final int NEVER_UTF = 0x00001000; + + /** + * Disable numbered capturing parentheses (named ones available) + */ + public static final int NO_AUTO_CAPTURE = 0x00002000; + + /** + * Disable auto-possessification + */ + public static final int NO_AUTO_POSSESS = 0x00004000; + + /** + * Disable automatic anchoring for .* + */ + public static final int NO_DOTSTAR_ANCHOR = 0x00008000; + + /** + * Disable match-time start optimizations + */ + public static final int NO_START_OPTIMIZE = 0x00010000; + + /** + * Use Unicode properties for \d, \w, etc. + */ + public static final int UCP = 0x00020000; + + /** + * Invert greediness of quantifiers + */ + public static final int UNGREEDY = 0x00040000; + + /** + * Treat pattern and subjects as UTF strings + */ + public static final int UTF = 0x00080000; + + /** + * Lock out the use of \C in patterns + */ + public static final int NEVER_BACKSLASH_C = 0x00100000; + + /** + * Alternative handling of ^ in multiline mode + */ + public static final int ALT_CIRCUMFLEX = 0x00200000; + + /** + * Process backslashes in verb names + */ + public static final int ALT_VERBNAMES = 0x00400000; + + /** + * Enable offset limit for unanchored matching + */ + public static final int USE_OFFSET_LIMIT = 0x00800000; + + public static final int EXTENDED_MORE = 0x01000000; + + /** + * Pattern characters are all literal + */ + public static final int LITERAL = 0x02000000; + + /** + * Enable support for matching invalid UTF + */ + public static final int MATCH_INVALID_UTF = 0x04000000; + + public static final int EXTRA_ALLOW_SURROGATE_ESCAPES = 0x00000001; + public static final int EXTRA_BAD_ESCAPE_IS_LITERAL = 0x00000002; + public static final int EXTRA_MATCH_WORD = 0x00000004; + public static final int EXTRA_MATCH_LINE = 0x00000008; + public static final int EXTRA_ESCAPED_CR_IS_LF = 0x00000010; + public static final int EXTRA_ALT_BSUX = 0x00000020; + public static final int EXTRA_ALLOW_LOOKAROUND_BSK = 0x00000040; + public static final int EXTRA_CASELESS_RESTRICT = 0x00000080; + public static final int EXTRA_ASCII_BSD = 0x00000100; + public static final int EXTRA_ASCII_BSS = 0x00000200; + public static final int EXTRA_ASCII_BSW = 0x00000400; + public static final int EXTRA_ASCII_POSIX = 0x00000800; + public static final int EXTRA_ASCII_DIGIT = 0x00001000; + + /** + * Compile code for full matching + */ + public static final int JIT_COMPLETE = 0x00000001; + + /** + * Compile code for soft partial matching + */ + public static final int JIT_PARTIAL_SOFT = 0x00000002; + + /** + * Compile code for hard partial matching + */ + public static final int JIT_PARTIAL_HARD = 0x00000004; + + /** + * @deprecated Use {@link #MATCH_INVALID_UTF} + */ + @Deprecated + public static final int JIT_INVALID_UTF = 0x00000100; + + /** + * Subject string is not the beginning of a line + */ + public static final int NOTBOL = 0x00000001; + + /** + * Subject string is not the end of a line + */ + public static final int NOTEOL = 0x00000002; + + /** + * An empty string is not a valid match + */ + public static final int NOTEMPTY = 0x00000004; + + /** + * An empty string at the start of the subject is not a valid match + */ + public static final int NOTEMPTY_ATSTART = 0x00000008; + + /** + * Return {@link IPcre2#ERROR_PARTIAL} for a partial match even if there is a full match + */ + public static final int PARTIAL_SOFT = 0x00000010; + + /** + * Return {@link IPcre2#ERROR_PARTIAL} for a partial match if no full matches are found + */ + public static final int PARTIAL_HARD = 0x00000020; + public static final int DFA_RESTART = 0x00000040; + public static final int DFA_SHORTEST = 0x00000080; + public static final int SUBSTITUTE_GLOBAL = 0x00000100; + public static final int SUBSTITUTE_EXTENDED = 0x00000200; + public static final int SUBSTITUTE_UNSET_EMPTY = 0x00000400; + public static final int SUBSTITUTE_UNKNOWN_UNSET = 0x00000800; + public static final int SUBSTITUTE_OVERFLOW_LENGTH = 0x00001000; + + /** + * Do not use JIT matching + */ + public static final int NO_JIT = 0x00002000; + + /** + * On success, make a private subject copy + */ + public static final int COPY_MATCHED_SUBJECT = 0x00004000; + + public static final int SUBSTITUTE_LITERAL = 0x00008000; + public static final int SUBSTITUTE_MATCHED = 0x00010000; + public static final int SUBSTITUTE_REPLACEMENT_ONLY = 0x00020000; + public static final int DISABLE_RECURSELOOP_CHECK = 0x00040000; + + public static final int CONVERT_UTF = 0x00000001; + public static final int CONVERT_NO_UTF_CHECK = 0x00000002; + public static final int CONVERT_POSIX_BASIC = 0x00000004; + public static final int CONVERT_POSIX_EXTENDED = 0x00000008; + public static final int CONVERT_GLOB = 0x00000010; + public static final int CONVERT_GLOB_NO_WILD_SEPARATOR = 0x00000030; + public static final int CONVERT_GLOB_NO_STARSTAR = 0x00000050; + + public static final int NEWLINE_CR = 1; + public static final int NEWLINE_LF = 2; + public static final int NEWLINE_CRLF = 3; + public static final int NEWLINE_ANY = 4; + public static final int NEWLINE_ANYCRLF = 5; + public static final int NEWLINE_NUL = 6; + + /** + * \R corresponds to the Unicode line endings + */ + public static final int BSR_UNICODE = 1; + + /** + * \R corresponds to CR, LF, and CRLF only + */ + public static final int BSR_ANYCRLF = 2; + + public static final int ERROR_END_BACKSLASH = 101; + public static final int ERROR_END_BACKSLASH_C = 102; + public static final int ERROR_UNKNOWN_ESCAPE = 103; + public static final int ERROR_QUANTIFIER_OUT_OF_ORDER = 104; + public static final int ERROR_QUANTIFIER_TOO_BIG = 105; + public static final int ERROR_MISSING_SQUARE_BRACKET = 106; + public static final int ERROR_ESCAPE_INVALID_IN_CLASS = 107; + public static final int ERROR_CLASS_RANGE_ORDER = 108; + public static final int ERROR_QUANTIFIER_INVALID = 109; + public static final int ERROR_INTERNAL_UNEXPECTED_REPEAT = 110; + public static final int ERROR_INVALID_AFTER_PARENS_QUERY = 111; + public static final int ERROR_POSIX_CLASS_NOT_IN_CLASS = 112; + public static final int ERROR_POSIX_NO_SUPPORT_COLLATING = 113; + public static final int ERROR_MISSING_CLOSING_PARENTHESIS = 114; + public static final int ERROR_BAD_SUBPATTERN_REFERENCE = 115; + public static final int ERROR_NULL_PATTERN = 116; + public static final int ERROR_BAD_OPTIONS = 117; + public static final int ERROR_MISSING_COMMENT_CLOSING = 118; + public static final int ERROR_PARENTHESES_NEST_TOO_DEEP = 119; + public static final int ERROR_PATTERN_TOO_LARGE = 120; + public static final int ERROR_HEAP_FAILED = 121; + public static final int ERROR_UNMATCHED_CLOSING_PARENTHESIS = 122; + public static final int ERROR_INTERNAL_CODE_OVERFLOW = 123; + public static final int ERROR_MISSING_CONDITION_CLOSING = 124; + public static final int ERROR_LOOKBEHIND_NOT_FIXED_LENGTH = 125; + public static final int ERROR_ZERO_RELATIVE_REFERENCE = 126; + public static final int ERROR_TOO_MANY_CONDITION_BRANCHES = 127; + public static final int ERROR_CONDITION_ASSERTION_EXPECTED = 128; + public static final int ERROR_BAD_RELATIVE_REFERENCE = 129; + public static final int ERROR_UNKNOWN_POSIX_CLASS = 130; + public static final int ERROR_INTERNAL_STUDY_ERROR = 131; + public static final int ERROR_UNICODE_NOT_SUPPORTED = 132; + public static final int ERROR_PARENTHESES_STACK_CHECK = 133; + public static final int ERROR_CODE_POINT_TOO_BIG = 134; + public static final int ERROR_LOOKBEHIND_TOO_COMPLICATED = 135; + public static final int ERROR_LOOKBEHIND_INVALID_BACKSLASH_C = 136; + public static final int ERROR_UNSUPPORTED_ESCAPE_SEQUENCE = 137; + public static final int ERROR_CALLOUT_NUMBER_TOO_BIG = 138; + public static final int ERROR_MISSING_CALLOUT_CLOSING = 139; + public static final int ERROR_ESCAPE_INVALID_IN_VERB = 140; + public static final int ERROR_UNRECOGNIZED_AFTER_QUERY_P = 141; + public static final int ERROR_MISSING_NAME_TERMINATOR = 142; + public static final int ERROR_DUPLICATE_SUBPATTERN_NAME = 143; + public static final int ERROR_INVALID_SUBPATTERN_NAME = 144; + public static final int ERROR_UNICODE_PROPERTIES_UNAVAILABLE = 145; + public static final int ERROR_MALFORMED_UNICODE_PROPERTY = 146; + public static final int ERROR_UNKNOWN_UNICODE_PROPERTY = 147; + public static final int ERROR_SUBPATTERN_NAME_TOO_LONG = 148; + public static final int ERROR_TOO_MANY_NAMED_SUBPATTERNS = 149; + public static final int ERROR_CLASS_INVALID_RANGE = 150; + public static final int ERROR_OCTAL_BYTE_TOO_BIG = 151; + public static final int ERROR_INTERNAL_OVERRAN_WORKSPACE = 152; + public static final int ERROR_INTERNAL_MISSING_SUBPATTERN = 153; + public static final int ERROR_DEFINE_TOO_MANY_BRANCHES = 154; + public static final int ERROR_BACKSLASH_O_MISSING_BRACE = 155; + public static final int ERROR_INTERNAL_UNKNOWN_NEWLINE = 156; + public static final int ERROR_BACKSLASH_G_SYNTAX = 157; + public static final int ERROR_PARENS_QUERY_R_MISSING_CLOSING = 158; + @Deprecated + public static final int ERROR_VERB_ARGUMENT_NOT_ALLOWED = 159; + public static final int ERROR_VERB_UNKNOWN = 160; + public static final int ERROR_SUBPATTERN_NUMBER_TOO_BIG = 161; + public static final int ERROR_SUBPATTERN_NAME_EXPECTED = 162; + public static final int ERROR_INTERNAL_PARSED_OVERFLOW = 163; + public static final int ERROR_INVALID_OCTAL = 164; + public static final int ERROR_SUBPATTERN_NAMES_MISMATCH = 165; + public static final int ERROR_MARK_MISSING_ARGUMENT = 166; + public static final int ERROR_INVALID_HEXADECIMAL = 167; + public static final int ERROR_BACKSLASH_C_SYNTAX = 168; + public static final int ERROR_BACKSLASH_K_SYNTAX = 169; + public static final int ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS = 170; + public static final int ERROR_BACKSLASH_N_IN_CLASS = 171; + public static final int ERROR_CALLOUT_STRING_TOO_LONG = 172; + public static final int ERROR_UNICODE_DISALLOWED_CODE_POINT = 173; + public static final int ERROR_UTF_IS_DISABLED = 174; + public static final int ERROR_UCP_IS_DISABLED = 175; + public static final int ERROR_VERB_NAME_TOO_LONG = 176; + public static final int ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG = 177; + public static final int ERROR_MISSING_OCTAL_OR_HEX_DIGITS = 178; + public static final int ERROR_VERSION_CONDITION_SYNTAX = 179; + public static final int ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS = 180; + public static final int ERROR_CALLOUT_NO_STRING_DELIMITER = 181; + public static final int ERROR_CALLOUT_BAD_STRING_DELIMITER = 182; + public static final int ERROR_BACKSLASH_C_CALLER_DISABLED = 183; + public static final int ERROR_QUERY_BARJX_NEST_TOO_DEEP = 184; + public static final int ERROR_BACKSLASH_C_LIBRARY_DISABLED = 185; + public static final int ERROR_PATTERN_TOO_COMPLICATED = 186; + public static final int ERROR_LOOKBEHIND_TOO_LONG = 187; + public static final int ERROR_PATTERN_STRING_TOO_LONG = 188; + public static final int ERROR_INTERNAL_BAD_CODE = 189; + public static final int ERROR_INTERNAL_BAD_CODE_IN_SKIP = 190; + public static final int ERROR_NO_SURROGATES_IN_UTF16 = 191; + public static final int ERROR_BAD_LITERAL_OPTIONS = 192; + public static final int ERROR_SUPPORTED_ONLY_IN_UNICODE = 193; + public static final int ERROR_INVALID_HYPHEN_IN_OPTIONS = 194; + public static final int ERROR_ALPHA_ASSERTION_UNKNOWN = 195; + public static final int ERROR_SCRIPT_RUN_NOT_AVAILABLE = 196; + public static final int ERROR_TOO_MANY_CAPTURES = 197; + public static final int ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED = 198; + public static final int ERROR_BACKSLASH_K_IN_LOOKAROUND = 199; + + public static final int ERROR_NOMATCH = -1; + public static final int ERROR_PARTIAL = -2; + public static final int ERROR_UTF8_ERR1 = -3; + public static final int ERROR_UTF8_ERR2 = -4; + public static final int ERROR_UTF8_ERR3 = -5; + public static final int ERROR_UTF8_ERR4 = -6; + public static final int ERROR_UTF8_ERR5 = -7; + public static final int ERROR_UTF8_ERR6 = -8; + public static final int ERROR_UTF8_ERR7 = -9; + public static final int ERROR_UTF8_ERR8 = -10; + public static final int ERROR_UTF8_ERR9 = -11; + public static final int ERROR_UTF8_ERR10 = -12; + public static final int ERROR_UTF8_ERR11 = -13; + public static final int ERROR_UTF8_ERR12 = -14; + public static final int ERROR_UTF8_ERR13 = -15; + public static final int ERROR_UTF8_ERR14 = -16; + public static final int ERROR_UTF8_ERR15 = -17; + public static final int ERROR_UTF8_ERR16 = -18; + public static final int ERROR_UTF8_ERR17 = -19; + public static final int ERROR_UTF8_ERR18 = -20; + public static final int ERROR_UTF8_ERR19 = -21; + public static final int ERROR_UTF8_ERR20 = -22; + public static final int ERROR_UTF8_ERR21 = -23; + public static final int ERROR_UTF16_ERR1 = -24; + public static final int ERROR_UTF16_ERR2 = -25; + public static final int ERROR_UTF16_ERR3 = -26; + public static final int ERROR_UTF32_ERR1 = -27; + public static final int ERROR_UTF32_ERR2 = -28; + public static final int ERROR_BADDATA = -29; + public static final int ERROR_MIXEDTABLES = -30; + public static final int ERROR_BADMAGIC = -31; + public static final int ERROR_BADMODE = -32; + public static final int ERROR_BADOFFSET = -33; + public static final int ERROR_BADOPTION = -34; + public static final int ERROR_BADREPLACEMENT = -35; + public static final int ERROR_BADUTFOFFSET = -36; + public static final int ERROR_CALLOUT = -37; + public static final int ERROR_DFA_BADRESTART = -38; + public static final int ERROR_DFA_RECURSE = -39; + public static final int ERROR_DFA_UCOND = -40; + public static final int ERROR_DFA_UFUNC = -41; + public static final int ERROR_DFA_UITEM = -42; + public static final int ERROR_DFA_WSSIZE = -43; + public static final int ERROR_INTERNAL = -44; + public static final int ERROR_JIT_BADOPTION = -45; + public static final int ERROR_JIT_STACKLIMIT = -46; + public static final int ERROR_MATCHLIMIT = -47; + public static final int ERROR_NOMEMORY = -48; + public static final int ERROR_NOSUBSTRING = -49; + public static final int ERROR_NOUNIQUESUBSTRING = -50; + public static final int ERROR_NULL = -51; + public static final int ERROR_RECURSELOOP = -52; + public static final int ERROR_DEPTHLIMIT = -53; + @Deprecated + public static final int ERROR_RECURSIONLIMIT = -53; + public static final int ERROR_UNAVAILABLE = -54; + public static final int ERROR_UNSET = -55; + public static final int ERROR_BADOFFSETLIMIT = -56; + public static final int ERROR_BADREPESCAPE = -57; + public static final int ERROR_REPMISSINGBRACE = -58; + public static final int ERROR_BADSUBSTITUTION = -59; + public static final int ERROR_BADSUBSPATTERN = -60; + public static final int ERROR_TOOMANYREPLACE = -61; + public static final int ERROR_BADSERIALIZEDDATA = -62; + public static final int ERROR_HEAPLIMIT = -63; + public static final int ERROR_CONVERT_SYNTAX = -64; + public static final int ERROR_INTERNAL_DUPMATCH = -65; + public static final int ERROR_DFA_UINVALID_UTF = -66; + public static final int ERROR_INVALIDOFFSET = -67; + + /** + * Final options after compiling + */ + public static final int INFO_ALLOPTIONS = 0; + + /** + * Options passed to {@link #compile(String, int, int[], long[], long)} + */ + public static final int INFO_ARGOPTIONS = 1; + + /** + * Number of highest backreference + */ + public static final int INFO_BACKREFMAX = 2; + + /** + * What \R matches: + * PCRE2_BSR_UNICODE: Unicode line endings + * PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only + */ + public static final int INFO_BSR = 3; + + /** + * Number of capturing subpatterns + */ + public static final int INFO_CAPTURECOUNT = 4; + + /** + * First code unit when type is 1 + */ + public static final int INFO_FIRSTCODEUNIT = 5; + + /** + * Type of start-of-match information + * 0 nothing set + * 1 first code unit is set + * 2 start of string or after newline + */ + public static final int INFO_FIRSTCODETYPE = 6; + + /** + * Bitmap of first code units, or 0 + */ + public static final int INFO_FIRSTBITMAP = 7; + + /** + * Return 1 if explicit CR or LF matches exist in the pattern + */ + public static final int INFO_HASCRORLF = 8; + + /** + * Return 1 if (?J) or (?-J) was used + */ + public static final int INFO_JCHANGED = 9; + + /** + * Size of JIT compiled code, or 0 + */ + public static final int INFO_JITSIZE = 10; + + /** + * Last code unit when type is 1 + */ + public static final int INFO_LASTCODEUNIT = 11; + + /** + * Type of must-be-present information + * 0 nothing set + * 1 code unit is set + */ + public static final int INFO_LASTCODETYPE = 12; + + /** + * 1 if the pattern can match an empty string, 0 otherwise + */ + public static final int INFO_MATCHEMPTY = 13; + + /** + * Match limit if set, otherwise {@link #ERROR_UNSET} + */ + public static final int INFO_MATCHLIMIT = 14; + + /** + * Length (in characters) of the longest lookbehind assertion + */ + public static final int INFO_MAXLOOKBEHIND = 15; + + /** + * Lower bound length of matching strings + */ + public static final int INFO_MINLENGTH = 16; + + /** + * Number of named subpatterns + */ + public static final int INFO_NAMECOUNT = 17; + + /** + * Size of name table entries + */ + public static final int INFO_NAMEENTRYSIZE = 18; + + /** + * Pointer to name table + */ + public static final int INFO_NAMETABLE = 19; + + /** + * Code for the newline sequence: + * {@link #NEWLINE_CR} + * {@link #NEWLINE_LF} + * {@link #NEWLINE_CRLF} + * {@link #NEWLINE_ANY} + * {@link #NEWLINE_ANYCRLF} + * {@link #NEWLINE_NUL} + */ + public static final int INFO_NEWLINE = 20; + + /** + * Backtracking depth limit if set, otherwise {@link #ERROR_UNSET} + */ + public static final int INFO_DEPTHLIMIT = 21; + + /** + * Obsolete synonym for {@link #INFO_DEPTHLIMIT} + */ + @Deprecated + public static final int INFO_RECURSIONLIMIT = 21; + + /** + * Size of compiled pattern + */ + public static final int INFO_SIZE = 22; + + /** + * Return 1 if pattern contains \C + */ + public static final int INFO_HASBACKSLASHC = 23; + + /** + * Size of backtracking frame + */ + public static final int INFO_FRAMESIZE = 24; + + /** + * Heap memory limit if set, otherwise {@link #ERROR_UNSET} + */ + public static final int INFO_HEAPLIMIT = 25; + + /** + * Extra options that were passed in the compile context + */ + public static final int INFO_EXTRAOPTIONS = 26; + + public static final int CONFIG_BSR = 0; + public static final int CONFIG_JIT = 1; + public static final int CONFIG_JITTARGET = 2; + public static final int CONFIG_LINKSIZE = 3; + public static final int CONFIG_MATCHLIMIT = 4; + public static final int CONFIG_NEWLINE = 5; + public static final int CONFIG_PARENSLIMIT = 6; + public static final int CONFIG_DEPTHLIMIT = 7; + @Deprecated + public static final int CONFIG_RECURSIONLIMIT = 7; + @Deprecated + public static final int CONFIG_STACKRECURSE = 8; + public static final int CONFIG_UNICODE = 9; + public static final int CONFIG_UNICODE_VERSION = 10; + public static final int CONFIG_VERSION = 11; + public static final int CONFIG_HEAPLIMIT = 12; + public static final int CONFIG_NEVER_BACKSLASH_C = 13; + public static final int CONFIG_COMPILED_WIDTHS = 14; + public static final int CONFIG_TABLES_LENGTH = 15; + + /** + * Create a new general context. + * + * @param privateMalloc the private malloc function or 0 to use the system function + * @param privateFree the private free function or 0 to use the system function + * @param memoryData the memory data to pass to the private malloc and free functions + * @return the general context handle + */ + public long generalContextCreate(long privateMalloc, long privateFree, long memoryData); + + /** + * Create a copy of a general context. + * + * @param gcontext the general context handle to copy + * @return the new general context handle + */ + public long generalContextCopy(long gcontext); + + /** + * Free a general context. + * + * @param gcontext the general context handle + */ + public void generalContextFree(long gcontext); + + /** + * Create a new compile context. + * + * @param gcontext the general context handle or 0 + * @return the compile context handle + */ + public long compileContextCreate(long gcontext); + + /** + * Create a copy of a compile context. + * + * @param ccontext the compile context handle to copy + * @return the new compile context handle + */ + public long compileContextCopy(long ccontext); + + /** + * Free a compile context. + * + * @param ccontext the compile context handle + */ + public void compileContextFree(long ccontext); + + /** + * Compile a regular expression pattern. + * + * @param pattern the pattern to compile + * @param options a combination of the compile options + * @param errorcode an array to store the error code + * @param erroroffset an array to store the error offset + * @param ccontext a compile context handle or 0 + * @return a compiled pattern handle + * @see pcre2_compile + */ + public long compile(String pattern, int options, int[] errorcode, long[] erroroffset, long ccontext); + + /** + * Free a compiled pattern resources. + * + * @param code the compiled pattern handle + * @see pcre2_code_free + */ + public void codeFree(long code); + + /** + * Get the error message for the given error code. + * + * @param errorcode the error code + * @param buffer the buffer to store the error message + * @return the length of the error message or {@link #ERROR_NOMEMORY} if the buffer is too small and if + * {@code errorcode} is not a valid error code, {@link #ERROR_BADDATA} is returned + * @see pcre2_get_error_message + */ + public int getErrorMessage(int errorcode, ByteBuffer buffer); + + /** + * Retrieve size of the information about a compiled pattern. + * + * @param code the compiled pattern handle + * @param what the information to retrieve + * @return Size of the information, otherwise an error code: + * {@link #ERROR_NULL} the argument code is 0 + * {@link #ERROR_BADMAGIC} the "magic number" was not found + * {@link #ERROR_BADOPTION} the value of {@code what} is invalid + * {@link #ERROR_BADMODE} the pattern was compiled in the wrong mode + * {@link #ERROR_UNSET} the requested information is not set + */ + public int patternInfo(long code, int what); + + /** + * Retrieve the information about a compiled pattern as Integer. + *

+ * Suitable for any information except: + * {@link #INFO_FIRSTBITMAP} + * {@link #INFO_NAMETABLE} + * May be suitable for the following information: + * {@link #INFO_JITSIZE} + * {@link #INFO_SIZE} + * {@link #INFO_FRAMESIZE} + * + * @param code the compiled pattern handle + * @param what the information to retrieve + * @param where the array to store the information + * @return Zero on success, otherwise an error code: + * {@link #ERROR_NULL} the argument code is 0 + * {@link #ERROR_BADMAGIC} the "magic number" was not found + * {@link #ERROR_BADOPTION} the value of {@code what} is invalid + * {@link #ERROR_BADMODE} the pattern was compiled in the wrong mode + * {@link #ERROR_UNSET} the requested information is not set + */ + public int patternInfo(long code, int what, int[] where); + + /** + * Retrieve the information about a compiled pattern as Long. + *

+ * May be suitable for the following information: + * {@link #INFO_JITSIZE} + * {@link #INFO_SIZE} + * {@link #INFO_FRAMESIZE} + * + * @param code the compiled pattern handle + * @param what the information to retrieve + * @param where the array to store the information + * @return Zero on success, otherwise an error code: + * {@link #ERROR_NULL} the argument code is 0 + * {@link #ERROR_BADMAGIC} the "magic number" was not found + * {@link #ERROR_BADOPTION} the value of {@code what} is invalid + * {@link #ERROR_BADMODE} the pattern was compiled in the wrong mode + * {@link #ERROR_UNSET} the requested information is not set + */ + public int patternInfo(long code, int what, long[] where); + + /** + * Retrieve the information about a compiled pattern as byte buffer. + *

+ * Only suitable for the following information: + * {@link #INFO_NAMETABLE} + * + * @param code the compiled pattern handle + * @param what the information to retrieve + * @param where the buffer to store the information + * @return Zero on success, otherwise an error code: + * {@link #ERROR_NULL} the argument code is 0 + * {@link #ERROR_BADMAGIC} the "magic number" was not found + * {@link #ERROR_BADOPTION} the value of {@code what} is invalid + * {@link #ERROR_BADMODE} the pattern was compiled in the wrong mode + * {@link #ERROR_UNSET} the requested information is not set + */ + public int patternInfo(long code, int what, ByteBuffer where); + + /** + * Create a new match data block. + * + * @param ovecsize the size of the ovector + * @param gcontext the general context handle or 0 + * @return the match data handle + */ + public long matchDataCreate(int ovecsize, long gcontext); + + /** + * Create a new match data block from a compiled pattern. + * + * @param code the compiled pattern handle + * @param gcontext the general context handle or 0 + * @return the match data handle + */ + public long matchDataCreateFromPattern(long code, long gcontext); + + /** + * Free a match data block. + * + * @param matchData the match data handle + */ + public void matchDataFree(long matchData); + + /** + * Create a new match context. + * + * @param gcontext the general context handle or 0 + * @return the match context handle + */ + public long matchContextCreate(long gcontext); + + /** + * Create a copy of a match context. + * + * @param mcontext the match context handle to copy + * @return the new match context handle + */ + public long matchContextCopy(long mcontext); + + /** + * Free a match context. + * + * @param mcontext the match context handle + */ + public void matchContextFree(long mcontext); + + /** + * Match a compiled pattern against a subject string. + * + * @param code the compiled pattern handle + * @param subject the subject string + * @param startoffset the starting offset in the subject string + * @param options option bits + * @param matchData the match data handle + * @param mcontext the match context handle + * @return the number of captures plus one, zero if the {@code matchData} is too small, or a negative value if there + * was no match or an actual error occurred + */ + public int match(long code, String subject, int startoffset, int options, long matchData, long mcontext); + + /** + * Get number of the offset pairs in the output vector of the match data + * + * @param matchData the match data handle + * @return the number of the offset pairs + */ + public int getOvectorCount(long matchData); + + /** + * Get the output vector of the match data + * + * @param matchData the match data handle + * @param ovector the array to store the output vector + */ + public void getOvector(long matchData, long[] ovector); +} diff --git a/ffm/build.gradle.kts b/ffm/build.gradle.kts new file mode 100644 index 0000000..32ff7af --- /dev/null +++ b/ffm/build.gradle.kts @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +plugins { + `java-library` + `maven-publish` +} + +repositories { + mavenCentral() +} + +dependencies { + implementation(project(":api")) + testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") + testImplementation(project(":lib")) + testImplementation(project(":test")) + testRuntimeOnly("org.junit.platform:junit-platform-launcher") +} + +configurations { + implementation { + resolutionStrategy.failOnVersionConflict() + } +} + +sourceSets { + main { + java.srcDir("src/main/java") + } +} + +java { + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 + + toolchain { + languageVersion = JavaLanguageVersion.of(21) + } + + withSourcesJar() + withJavadocJar() +} + +tasks.withType { + options.compilerArgs.add("--enable-preview") +} + +tasks.withType { + useJUnitPlatform() + jvmArgs("--enable-preview") + systemProperty( + "java.library.path", listOf( + System.getProperty("pcre2.library.path"), + System.getProperty("java.library.path") + ).joinToString(":") + ) +} + +tasks.named("sourcesJar") { + duplicatesStrategy = DuplicatesStrategy.INCLUDE +} + +tasks.withType { + val javadocOptions = options as CoreJavadocOptions + + javadocOptions.addStringOption("source", "21") + javadocOptions.addBooleanOption("-enable-preview", true) +} + +publishing { + publications { + create("mavenJava") { + from(components["java"]) + + artifact(tasks.named("sourcesJar")) + artifact(tasks.named("javadocJar")) + + groupId = "org.pcre4j" + artifactId = project.name + version = findProperty("pcre4j.version") as String? ?: "0.0.0-SNAPSHOT" + } + } + + repositories { + mavenCentral { + credentials { + username = findProperty("pcre4j.mavenCentral.user") as String? ?: "" + password = findProperty("pcre4j.mavenCentral.password") as String? ?: "" + } + } + } +} diff --git a/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java b/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java new file mode 100644 index 0000000..aeabb92 --- /dev/null +++ b/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java @@ -0,0 +1,628 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j.ffm; + +import org.pcre4j.api.IPcre2; + +import java.io.File; +import java.lang.foreign.*; +import java.lang.invoke.MethodHandle; +import java.nio.ByteBuffer; + +/** + * A PCRE2 API using the Foreign Function {@literal &} Memory API. + */ +public class Pcre2 implements IPcre2 { + + private static final Linker LINKER = Linker.nativeLinker(); + private static final SymbolLookup SYMBOL_LOOKUP = SymbolLookup.loaderLookup(); + + private final MethodHandle pcre2_general_context_create; + private final MethodHandle pcre2_general_context_copy; + private final MethodHandle pcre2_general_context_free; + + private final MethodHandle pcre2_compile_context_create; + private final MethodHandle pcre2_compile_context_copy; + private final MethodHandle pcre2_compile_context_free; + + private final MethodHandle pcre2_compile; + private final MethodHandle pcre2_code_free; + + private final MethodHandle pcre2_get_error_message; + private final MethodHandle pcre2_pattern_info; + + private final MethodHandle pcre2_match_data_create; + private final MethodHandle pcre2_match_data_create_from_pattern; + private final MethodHandle pcre2_match_data_free; + + private final MethodHandle pcre2_match_context_create; + private final MethodHandle pcre2_match_context_copy; + private final MethodHandle pcre2_match_context_free; + + private final MethodHandle pcre2_match; + + private final MethodHandle pcre2_get_ovector_count; + private final MethodHandle pcre2_get_ovector_pointer; + + /** + * Constructs a new PCRE2 API using the common library name "pcre2-8". + */ + public Pcre2() { + this("pcre2-8", "_8"); + } + + /** + * Constructs a new PCRE2 API using the specified library name and function suffix. + * + * @param library the library name or path to the file (e.g. "pcre2-8" or "/usr/lib/libpcre2-8.so") + * @param suffix the function suffix (e.g. "_8" as in "pcre2_compile_8") + */ + public Pcre2(String library, String suffix) { + if (new File(library).exists()) { + System.load(library); + } else { + System.loadLibrary(library); + } + + pcre2_general_context_create = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_general_context_create" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_general_context* + ValueLayout.ADDRESS, // void* (*)(PCRE2_SIZE, void *) + ValueLayout.ADDRESS, // void* (*)(PCRE2_SIZE, void *) + ValueLayout.ADDRESS // void* + ) + ); + + pcre2_general_context_copy = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_general_context_copy" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_general_context* + ValueLayout.ADDRESS // pcre2_general_context* + ) + ); + + pcre2_general_context_free = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_general_context_free" + suffix).orElseThrow(), + FunctionDescriptor.ofVoid( + ValueLayout.ADDRESS // pcre2_general_context* + ) + ); + + pcre2_compile_context_create = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_compile_context_create" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_compile_context* + ValueLayout.ADDRESS // pcre2_general_context* + ) + ); + + pcre2_compile_context_copy = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_compile_context_copy" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_compile_context* + ValueLayout.ADDRESS // pcre2_compile_context* + ) + ); + + pcre2_compile_context_free = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_compile_context_free" + suffix).orElseThrow(), + FunctionDescriptor.ofVoid( + ValueLayout.ADDRESS // pcre2_compile_context* + ) + ); + + pcre2_compile = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_compile" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_code* + ValueLayout.ADDRESS, // PCRE2_SPTR + ValueLayout.ADDRESS, // PCRE2_SIZE + ValueLayout.JAVA_INT, // uint32_t + ValueLayout.ADDRESS, // int* + ValueLayout.ADDRESS, // PCRE2_SIZE* + ValueLayout.ADDRESS // pcre2_compile_context* + ) + ); + + pcre2_code_free = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_code_free" + suffix).orElseThrow(), + FunctionDescriptor.ofVoid( + ValueLayout.ADDRESS // pcre2_code* + ) + ); + + pcre2_get_error_message = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_get_error_message" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_INT, // int + ValueLayout.JAVA_INT, // int + ValueLayout.ADDRESS, // PCRE2_UCHAR* + ValueLayout.ADDRESS // PCRE2_SIZE + ) + ); + + pcre2_pattern_info = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_pattern_info" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_INT, // int + ValueLayout.ADDRESS, // pcre2_code* + ValueLayout.JAVA_INT, // int + ValueLayout.ADDRESS // void* + ) + ); + + pcre2_match_data_create = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_match_data_create" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_match_data* + ValueLayout.JAVA_INT, // int + ValueLayout.ADDRESS // pcre2_general_context* + ) + ); + + pcre2_match_data_create_from_pattern = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_match_data_create_from_pattern" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_match_data* + ValueLayout.ADDRESS, // pcre2_code* + ValueLayout.ADDRESS // pcre2_general_context* + ) + ); + + pcre2_match_data_free = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_match_data_free" + suffix).orElseThrow(), + FunctionDescriptor.ofVoid( + ValueLayout.ADDRESS // pcre2_match_data* + ) + ); + + pcre2_match_context_create = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_match_context_create" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_match_context* + ValueLayout.ADDRESS // pcre2_general_context* + ) + ); + + pcre2_match_context_copy = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_match_context_copy" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_match_context* + ValueLayout.ADDRESS // pcre2_match_context* + ) + ); + + pcre2_match_context_free = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_match_context_free" + suffix).orElseThrow(), + FunctionDescriptor.ofVoid( + ValueLayout.ADDRESS // pcre2_match_context* + ) + ); + + pcre2_match = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_match" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_INT, // int + ValueLayout.ADDRESS, // pcre2_code* + ValueLayout.ADDRESS, // PCRE2_SPTR + ValueLayout.ADDRESS, // PCRE2_SIZE + ValueLayout.ADDRESS, // PCRE2_SIZE + ValueLayout.JAVA_INT, // int + ValueLayout.ADDRESS, // pcre2_match_data* + ValueLayout.ADDRESS // pcre2_match_context* + ) + ); + + pcre2_get_ovector_count = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_get_ovector_count" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_INT, // int + ValueLayout.ADDRESS // pcre2_match_data* + ) + ); + + pcre2_get_ovector_pointer = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_get_ovector_pointer" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // PCRE2_SIZE* + ValueLayout.ADDRESS // pcre2_match_data* + ) + ); + } + + @Override + public long generalContextCreate(long privateMalloc, long privateFree, long memoryData) { + try (var arena = Arena.ofConfined()) { + final var pPrivateMalloc = MemorySegment.ofAddress(privateMalloc); + final var pPrivateFree = MemorySegment.ofAddress(privateFree); + final var pMemoryData = MemorySegment.ofAddress(memoryData); + + final var pGContext = (MemorySegment) pcre2_general_context_create.invokeExact( + pPrivateMalloc, + pPrivateFree, + pMemoryData + ); + + return pGContext.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public long generalContextCopy(long gcontext) { + try (var arena = Arena.ofConfined()) { + final var pGContext = MemorySegment.ofAddress(gcontext); + + final var pNewGContext = (MemorySegment) pcre2_general_context_copy.invokeExact( + pGContext + ); + + return pNewGContext.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public void generalContextFree(long gcontext) { + try (var arena = Arena.ofConfined()) { + final var pGContext = MemorySegment.ofAddress(gcontext); + + pcre2_general_context_free.invokeExact( + pGContext + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public long compileContextCreate(long gcontext) { + try (var arena = Arena.ofConfined()) { + final var pGContext = MemorySegment.ofAddress(gcontext); + + final var pCContext = (MemorySegment) pcre2_compile_context_create.invokeExact( + pGContext + ); + + return pCContext.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public long compileContextCopy(long ccontext) { + try (var arena = Arena.ofConfined()) { + final var pCContext = MemorySegment.ofAddress(ccontext); + + final var pNewCContext = (MemorySegment) pcre2_compile_context_copy.invokeExact( + pCContext + ); + + return pNewCContext.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public void compileContextFree(long ccontext) { + try (var arena = Arena.ofConfined()) { + final var pCContext = MemorySegment.ofAddress(ccontext); + + pcre2_compile_context_free.invokeExact( + pCContext + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + + @Override + public long compile(String pattern, int options, int[] errorcode, long[] erroroffset, long ccontext) { + if (errorcode == null || errorcode.length < 1) { + throw new IllegalArgumentException("errorcode must be an array of length 1"); + } + if (erroroffset == null || erroroffset.length < 1) { + throw new IllegalArgumentException("erroroffset must be an array of length 1"); + } + + try (var arena = Arena.ofConfined()) { + final var pszPattern = arena.allocateUtf8String(pattern); + final var patternSize = MemorySegment.ofAddress(pszPattern.byteSize() - 1); + final var pErrorCode = arena.allocateArray(ValueLayout.JAVA_INT, 1); + final var pErrorOffset = arena.allocateArray(ValueLayout.JAVA_LONG, 1); + final var pContext = MemorySegment.ofAddress(ccontext); + + final var pCode = (MemorySegment) pcre2_compile.invokeExact( + pszPattern, + patternSize, + options, + pErrorCode, + pErrorOffset, + pContext + ); + + errorcode[0] = pErrorCode.get(ValueLayout.JAVA_INT, 0); + erroroffset[0] = pErrorOffset.get(ValueLayout.JAVA_LONG, 0); + + return pCode.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public void codeFree(long code) { + try (var arena = Arena.ofConfined()) { + final var pCode = MemorySegment.ofAddress(code); + + pcre2_code_free.invokeExact( + pCode + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public int getErrorMessage(int errorcode, ByteBuffer buffer) { + if (buffer == null) { + throw new IllegalArgumentException("buffer must not be null"); + } + if (!buffer.isDirect()) { + throw new IllegalArgumentException("buffer must be direct"); + } + + try (var arena = Arena.ofConfined()) { + final var pszBuffer = MemorySegment.ofBuffer(buffer); + final var bufferSize = MemorySegment.ofAddress(buffer.capacity()); + + return (int) pcre2_get_error_message.invokeExact( + errorcode, + pszBuffer, + bufferSize + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public int patternInfo(long code, int what) { + try (var arena = Arena.ofConfined()) { + final var pCode = MemorySegment.ofAddress(code); + final var pWhere = MemorySegment.ofAddress(0); + + return (int) pcre2_pattern_info.invokeExact( + pCode, + what, + pWhere + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public int patternInfo(long code, int what, int[] where) { + if (where == null) { + throw new IllegalArgumentException("where must not be null"); + } + if (where.length != 1) { + throw new IllegalArgumentException("where must be an array of length 1"); + } + + try (var arena = Arena.ofConfined()) { + final var pCode = MemorySegment.ofAddress(code); + final var pWhere = arena.allocateArray(ValueLayout.JAVA_INT, 1); + + final var result = (int) pcre2_pattern_info.invokeExact( + pCode, + what, + pWhere + ); + + where[0] = pWhere.get(ValueLayout.JAVA_INT, 0); + + return result; + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public int patternInfo(long code, int what, long[] where) { + if (where == null) { + throw new IllegalArgumentException("where must not be null"); + } + if (where.length != 1) { + throw new IllegalArgumentException("where must be an array of length 1"); + } + + try (var arena = Arena.ofConfined()) { + final var pCode = MemorySegment.ofAddress(code); + final var pWhere = arena.allocateArray(ValueLayout.JAVA_LONG, 1); + + final var result = (int) pcre2_pattern_info.invokeExact( + pCode, + what, + pWhere + ); + + where[0] = pWhere.get(ValueLayout.JAVA_LONG, 0); + + return result; + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public int patternInfo(long code, int what, ByteBuffer where) { + if (where == null) { + throw new IllegalArgumentException("where must not be null"); + } + + try (var arena = Arena.ofConfined()) { + final var pCode = MemorySegment.ofAddress(code); + final var pWhere = arena.allocateArray(ValueLayout.ADDRESS, 1); + + final var result = (int) pcre2_pattern_info.invokeExact( + pCode, + what, + pWhere + ); + + final var pTable = pWhere.get(ValueLayout.ADDRESS, 0).reinterpret(where.capacity()); + MemorySegment.ofBuffer(where).copyFrom(pTable); + + return result; + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public long matchDataCreate(int ovecsize, long gcontext) { + try (var arena = Arena.ofConfined()) { + final var pGContext = MemorySegment.ofAddress(gcontext); + + final var pMatchData = (MemorySegment) pcre2_match_data_create.invokeExact( + ovecsize, + pGContext + ); + + return pMatchData.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public long matchDataCreateFromPattern(long code, long gcontext) { + try (var arena = Arena.ofConfined()) { + final var pCode = MemorySegment.ofAddress(code); + final var pGContext = MemorySegment.ofAddress(gcontext); + + final var pMatchData = (MemorySegment) pcre2_match_data_create_from_pattern.invokeExact( + pCode, + pGContext + ); + + return pMatchData.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public void matchDataFree(long matchData) { + try (var arena = Arena.ofConfined()) { + final var pMatchData = MemorySegment.ofAddress(matchData); + + pcre2_match_data_free.invokeExact( + pMatchData + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public long matchContextCreate(long gcontext) { + try (var arena = Arena.ofConfined()) { + final var pGContext = MemorySegment.ofAddress(gcontext); + + final var pMatchContext = (MemorySegment) pcre2_match_context_create.invokeExact( + pGContext + ); + + return pMatchContext.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public long matchContextCopy(long mcontext) { + try (var arena = Arena.ofConfined()) { + final var pMatchContext = MemorySegment.ofAddress(mcontext); + + final var pNewMatchContext = (MemorySegment) pcre2_match_context_copy.invokeExact( + pMatchContext + ); + + return pNewMatchContext.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public void matchContextFree(long mcontext) { + try (var arena = Arena.ofConfined()) { + final var pMatchContext = MemorySegment.ofAddress(mcontext); + + pcre2_match_context_free.invokeExact( + pMatchContext + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public int match(long code, String subject, int startoffset, int options, long matchData, long mcontext) { + try (var arena = Arena.ofConfined()) { + final var pCode = MemorySegment.ofAddress(code); + final var pszSubject = arena.allocateUtf8String(subject); + final var subjectLength = MemorySegment.ofAddress(pszSubject.byteSize() - 1); + final var startOffset = MemorySegment.ofAddress(startoffset); + final var pMatchData = MemorySegment.ofAddress(matchData); + final var pMatchContext = MemorySegment.ofAddress(mcontext); + + return (int) pcre2_match.invokeExact( + pCode, + pszSubject, + subjectLength, + startOffset, + options, + pMatchData, + pMatchContext + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public int getOvectorCount(long matchData) { + try (var arena = Arena.ofConfined()) { + final var pMatchData = MemorySegment.ofAddress(matchData); + + return (int) pcre2_get_ovector_count.invokeExact( + pMatchData + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public void getOvector(long matchData, long[] ovector) { + try (var arena = Arena.ofConfined()) { + final var pMatchData = MemorySegment.ofAddress(matchData); + + final var pOvector = (MemorySegment) pcre2_get_ovector_pointer.invokeExact( + pMatchData + ); + + MemorySegment.ofArray(ovector).copyFrom(pOvector.reinterpret(ovector.length * 8)); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } +} diff --git a/ffm/src/test/java/org/pcre4j/ffm/Pcre2Tests.java b/ffm/src/test/java/org/pcre4j/ffm/Pcre2Tests.java new file mode 100644 index 0000000..9a09c54 --- /dev/null +++ b/ffm/src/test/java/org/pcre4j/ffm/Pcre2Tests.java @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j.ffm; + +import org.pcre4j.Pcre4j; + +public class Pcre2Tests extends org.pcre4j.test.Pcre2Tests { + static { + Pcre4j.setup(new Pcre2()); + } +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..e644113 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..a441313 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..b740cf1 --- /dev/null +++ b/gradlew @@ -0,0 +1,249 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..25da30d --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,92 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/jna/build.gradle.kts b/jna/build.gradle.kts new file mode 100644 index 0000000..0d7c61f --- /dev/null +++ b/jna/build.gradle.kts @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +plugins { + `java-library` + `maven-publish` +} + +repositories { + mavenCentral() +} + +dependencies { + implementation(project(":api")) + implementation("net.java.dev.jna:jna-platform:5.14.0") + testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") + testImplementation(project(":lib")) + testImplementation(project(":test")) + testRuntimeOnly("org.junit.platform:junit-platform-launcher") +} + +configurations { + implementation { + resolutionStrategy.failOnVersionConflict() + } +} + +sourceSets { + main { + java.srcDir("src/main/java") + } +} + +java { + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 + + toolchain { + languageVersion = JavaLanguageVersion.of(21) + } + + withSourcesJar() + withJavadocJar() +} + +tasks.withType { + useJUnitPlatform() +} + +tasks.named("sourcesJar") { + duplicatesStrategy = DuplicatesStrategy.INCLUDE +} + +publishing { + publications { + create("mavenJava") { + from(components["java"]) + + artifact(tasks.named("sourcesJar")) + artifact(tasks.named("javadocJar")) + + groupId = "org.pcre4j" + artifactId = project.name + version = findProperty("pcre4j.version") as String? ?: "0.0.0-SNAPSHOT" + } + } + + repositories { + mavenCentral { + credentials { + username = findProperty("pcre4j.mavenCentral.user") as String? ?: "" + password = findProperty("pcre4j.mavenCentral.password") as String? ?: "" + } + } + } +} diff --git a/jna/src/main/java/org/pcre4j/jna/Pcre2.java b/jna/src/main/java/org/pcre4j/jna/Pcre2.java new file mode 100644 index 0000000..b16a98b --- /dev/null +++ b/jna/src/main/java/org/pcre4j/jna/Pcre2.java @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j.jna; + +import com.sun.jna.FunctionMapper; +import com.sun.jna.Native; +import com.sun.jna.NativeLibrary; +import com.sun.jna.Pointer; +import com.sun.jna.ptr.IntByReference; +import com.sun.jna.ptr.LongByReference; +import org.pcre4j.api.IPcre2; + +import java.lang.reflect.Method; +import java.nio.ByteBuffer; +import java.util.Map; + +/** + * A PCRE2 API using the JNA. + */ +public class Pcre2 implements IPcre2 { + + /** + * The PCRE2 library loaded by JNA. + */ + private final Library library; + + /** + * Constructs a new PCRE2 API using the common library name "pcre2-8". + */ + public Pcre2() { + this("pcre2-8", "_8"); + } + + /** + * Constructs a new PCRE2 API using the specified library name and function suffix. + * + * @param libraryName the library name + * (e.g. "pcre2-8" for "pcre2-8.dll" on Windows, "libpcre2-8.so" on Linux, "libpcre2-8.dylib" on macOS) + * @param suffix the function suffix (e.g. "_8" as in "pcre2_compile_8") + */ + public Pcre2(String libraryName, String suffix) { + this.library = Native.load( + libraryName, + Library.class, + Map.of(Library.OPTION_FUNCTION_MAPPER, new SuffixFunctionMapper(suffix)) + ); + } + + @Override + public long generalContextCreate(long privateMalloc, long privateFree, long memoryData) { + Pointer gContext = library.pcre2_general_context_create( + new Pointer(privateMalloc), + new Pointer(privateFree), + new Pointer(memoryData) + ); + return Pointer.nativeValue(gContext); + } + + @Override + public long generalContextCopy(long gcontext) { + Pointer newGContext = library.pcre2_general_context_copy(new Pointer(gcontext)); + return Pointer.nativeValue(newGContext); + } + + @Override + public void generalContextFree(long gcontext) { + library.pcre2_general_context_free(new Pointer(gcontext)); + } + + @Override + public long compileContextCreate(long gcontext) { + Pointer cContext = library.pcre2_compile_context_create(new Pointer(gcontext)); + return Pointer.nativeValue(cContext); + } + + @Override + public long compileContextCopy(long ccontext) { + Pointer newCContext = library.pcre2_compile_context_copy(new Pointer(ccontext)); + return Pointer.nativeValue(newCContext); + } + + @Override + public void compileContextFree(long ccontext) { + library.pcre2_compile_context_free(new Pointer(ccontext)); + } + + @Override + public long compile(String pattern, int options, int[] errorcode, long[] erroroffset, long ccontext) { + IntByReference errorCodeRef = new IntByReference(); + LongByReference errorOffsetRef = new LongByReference(); + + Pointer code = library.pcre2_compile( + pattern, + pattern.length(), + options, + errorCodeRef, + errorOffsetRef, + new Pointer(ccontext) + ); + + errorcode[0] = errorCodeRef.getValue(); + erroroffset[0] = errorOffsetRef.getValue(); + + return Pointer.nativeValue(code); + } + + @Override + public void codeFree(long code) { + library.pcre2_code_free(new Pointer(code)); + } + + @Override + public int getErrorMessage(int errorcode, ByteBuffer buffer) { + Pointer pszBuffer = Native.getDirectBufferPointer(buffer); + return library.pcre2_get_error_message(errorcode, pszBuffer, buffer.capacity()); + } + + @Override + public int patternInfo(long code, int what) { + return library.pcre2_pattern_info(new Pointer(code), what, Pointer.NULL); + } + + @Override + public int patternInfo(long code, int what, int[] where) { + IntByReference whereRef = new IntByReference(); + int result = library.pcre2_pattern_info(new Pointer(code), what, whereRef.getPointer()); + where[0] = whereRef.getValue(); + return result; + } + + @Override + public int patternInfo(long code, int what, long[] where) { + LongByReference whereRef = new LongByReference(); + int result = library.pcre2_pattern_info(new Pointer(code), what, whereRef.getPointer()); + where[0] = whereRef.getValue(); + return result; + } + + @Override + public int patternInfo(long code, int what, ByteBuffer where) { + Pointer wherePtr = Native.getDirectBufferPointer(where); + return library.pcre2_pattern_info(new Pointer(code), what, wherePtr); + } + + @Override + public long matchDataCreate(int ovecsize, long gcontext) { + Pointer matchData = library.pcre2_match_data_create(ovecsize, new Pointer(gcontext)); + return Pointer.nativeValue(matchData); + } + + @Override + public long matchDataCreateFromPattern(long code, long gcontext) { + Pointer matchData = library.pcre2_match_data_create_from_pattern(new Pointer(code), new Pointer(gcontext)); + return Pointer.nativeValue(matchData); + } + + @Override + public void matchDataFree(long matchData) { + library.pcre2_match_data_free(new Pointer(matchData)); + } + + @Override + public long matchContextCreate(long gcontext) { + Pointer matchContext = library.pcre2_match_context_create(new Pointer(gcontext)); + return Pointer.nativeValue(matchContext); + } + + @Override + public long matchContextCopy(long mcontext) { + Pointer newMatchContext = library.pcre2_match_context_copy(new Pointer(mcontext)); + return Pointer.nativeValue(newMatchContext); + } + + @Override + public void matchContextFree(long mcontext) { + library.pcre2_match_context_free(new Pointer(mcontext)); + } + + @Override + public int match(long code, String subject, int startoffset, int options, long matchData, long mcontext) { + return library.pcre2_match( + new Pointer(code), + subject, + subject.length(), + startoffset, + options, + new Pointer(matchData), + new Pointer(mcontext) + ); + } + + @Override + public int getOvectorCount(long matchData) { + return library.pcre2_get_ovector_count(new Pointer(matchData)); + } + + @Override + public void getOvector(long matchData, long[] ovector) { + Pointer pOvector = library.pcre2_get_ovector_pointer(new Pointer(matchData)); + pOvector.read(0, ovector, 0, ovector.length); + } + + private interface Library extends com.sun.jna.Library { + Pointer pcre2_general_context_create(Pointer malloc, Pointer free, Pointer memoryData); + + Pointer pcre2_general_context_copy(Pointer gcontext); + + void pcre2_general_context_free(Pointer gcontext); + + Pointer pcre2_compile_context_create(Pointer gcontext); + + Pointer pcre2_compile_context_copy(Pointer ccontext); + + void pcre2_compile_context_free(Pointer ccontext); + + Pointer pcre2_compile(String pattern, long patternLength, int options, IntByReference errorcode, LongByReference erroroffset, Pointer ccontext); + + void pcre2_code_free(Pointer code); + + int pcre2_get_error_message(int errorcode, Pointer buffer, long bufferSize); + + int pcre2_pattern_info(Pointer code, int what, Pointer where); + + Pointer pcre2_match_data_create(int ovecsize, Pointer gcontext); + + Pointer pcre2_match_data_create_from_pattern(Pointer code, Pointer gcontext); + + void pcre2_match_data_free(Pointer matchData); + + Pointer pcre2_match_context_create(Pointer gcontext); + + Pointer pcre2_match_context_copy(Pointer mcontext); + + void pcre2_match_context_free(Pointer mcontext); + + int pcre2_match(Pointer code, String subject, long length, long startOffset, int options, Pointer matchData, Pointer mcontext); + + int pcre2_get_ovector_count(Pointer matchData); + + Pointer pcre2_get_ovector_pointer(Pointer matchData); + } + + private record SuffixFunctionMapper(String suffix) implements FunctionMapper { + @Override + public String getFunctionName(NativeLibrary nativeLibrary, Method method) { + return method.getName() + suffix; + } + } +} diff --git a/jna/src/test/java/org/pcre4j/jna/Pcre2Tests.java b/jna/src/test/java/org/pcre4j/jna/Pcre2Tests.java new file mode 100644 index 0000000..ee525d1 --- /dev/null +++ b/jna/src/test/java/org/pcre4j/jna/Pcre2Tests.java @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j.jna; + +import org.pcre4j.Pcre4j; + +public class Pcre2Tests extends org.pcre4j.test.Pcre2Tests { + static { + Pcre4j.setup(new Pcre2()); + } +} diff --git a/lib/build.gradle.kts b/lib/build.gradle.kts new file mode 100644 index 0000000..7074000 --- /dev/null +++ b/lib/build.gradle.kts @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +plugins { + `java-library` + `maven-publish` +} + +repositories { + mavenCentral() +} + +dependencies { + implementation(project(":api")) + testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") + testRuntimeOnly("org.junit.platform:junit-platform-launcher") +} + +configurations { + implementation { + resolutionStrategy.failOnVersionConflict() + } +} + +sourceSets { + main { + java.srcDir("src/main/java") + } +} + +java { + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 + + toolchain { + languageVersion = JavaLanguageVersion.of(21) + } + + withSourcesJar() + withJavadocJar() +} + +tasks.withType { + useJUnitPlatform() +} + +tasks.named("sourcesJar") { + duplicatesStrategy = DuplicatesStrategy.INCLUDE +} + +publishing { + publications { + create("mavenJava") { + from(components["java"]) + + artifact(tasks.named("sourcesJar")) + artifact(tasks.named("javadocJar")) + + groupId = "org.pcre4j" + artifactId = project.name + version = findProperty("pcre4j.version") as String? ?: "0.0.0-SNAPSHOT" + } + } + + repositories { + mavenCentral { + credentials { + username = findProperty("pcre4j.mavenCentral.user") as String? ?: "" + password = findProperty("pcre4j.mavenCentral.password") as String? ?: "" + } + } + } +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2Bsr.java b/lib/src/main/java/org/pcre4j/Pcre2Bsr.java new file mode 100644 index 0000000..072203e --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2Bsr.java @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.util.Arrays; +import java.util.Optional; + +/** + * The \R processing option. + */ +public enum Pcre2Bsr { + /** + * \R corresponds to the Unicode line endings + */ + UNICODE(IPcre2.BSR_UNICODE), + + /** + * \R corresponds to CR, LF, and CRLF only + */ + ANYCRLF(IPcre2.BSR_ANYCRLF); + + /** + * The integer value + */ + private final int value; + + /** + * Create an enum entry with the given integer value. + * + * @param value the integer value + */ + private Pcre2Bsr(int value) { + this.value = value; + } + + /** + * Get the enum entry by its integer value. + * + * @param value the integer value + * @return the enum entry + */ + public static Optional valueOf(int value) { + return Arrays.stream(values()) + .filter(entry -> entry.value == value) + .findFirst(); + } + + /** + * Get the integer value. + * + * @return the integer value + */ + public int value() { + return value; + } +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2Code.java b/lib/src/main/java/org/pcre4j/Pcre2Code.java new file mode 100644 index 0000000..53e090a --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2Code.java @@ -0,0 +1,418 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.lang.ref.Cleaner; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.EnumSet; + +public class Pcre2Code { + + private static final Cleaner cleaner = Cleaner.create(); + /** + * The compiled pattern handle + */ + /* package-private */ final long handle; + /** + * The PCRE2 API reference to use across the entire lifecycle of the object + */ + private final IPcre2 api; + /** + * The cleaner to free the compiled pattern + */ + private final Cleaner.Cleanable cleanable; + + /** + * Constructor for Pcre2Code + * + * @param pattern the pattern to compile + * @param options the flags to compile the pattern with, see {@link Pcre2CompileOption} + * @param compileContext the compile context to use or null + */ + public Pcre2Code(String pattern, EnumSet options, Pcre2CompileContext compileContext) { + final var api = Pcre4j.api(); + + final var errorcode = new int[1]; + final var erroroffset = new long[1]; + final var handle = api.compile( + pattern, + options + .stream() + .mapToInt(Pcre2CompileOption::value) + .sum(), + errorcode, + erroroffset, + compileContext != null ? compileContext.handle : 0 + ); + if (handle == 0) { + throw new Pcre2CompileError(pattern, erroroffset[0], Pcre4jUtils.getErrorMessage(api, errorcode[0])); + } + + this.api = api; + this.handle = handle; + this.cleanable = cleaner.register(this, new Clean(api, handle)); + } + + /** + * Get the pattern information that is an integer + * + * @param info the information to get + * @return the information integer value + */ + private int getPatternIntInfo(int info) { + final var size = api.patternInfo(handle, info); + if (size != 4) { + throw new Pcre2PatternInfoSizeError(Pcre2PatternInfo.valueOf(info).orElseThrow(), size); + } + + final var where = new int[1]; + final var error = api.patternInfo(handle, info, where); + if (error != 0) { + throw new IllegalStateException(Pcre4jUtils.getErrorMessage(api, error)); + } + + return where[0]; + } + + /** + * Get the pattern size information as long + * + * @param info the information to get + * @return the size information as long + */ + private long getPatternSizeInfo(int info) { + final var infoSize = api.patternInfo(handle, IPcre2.INFO_FRAMESIZE); + + if (infoSize == 4) { + final var where = new int[1]; + final var error = api.patternInfo(handle, IPcre2.INFO_FRAMESIZE, where); + if (error != 0) { + throw new IllegalStateException(Pcre4jUtils.getErrorMessage(api, error)); + } + + return where[0]; + } else if (infoSize == 8) { + final var where = new long[1]; + final var error = api.patternInfo(handle, IPcre2.INFO_FRAMESIZE, where); + if (error != 0) { + throw new IllegalStateException(Pcre4jUtils.getErrorMessage(api, error)); + } + + return where[0]; + } + + throw new Pcre2PatternInfoSizeError(Pcre2PatternInfo.valueOf(IPcre2.INFO_FRAMESIZE).orElseThrow(), infoSize); + } + + /** + * Get the number of highest backreference + * + * @return the number of highest backreference + */ + public int backRefMax() { + return getPatternIntInfo(IPcre2.INFO_BACKREFMAX); + } + +// TODO: PCRE2_INFO_ALLOPTIONS Final options after compiling + + /** + * Get the compile options + * + * @return the compile options + */ + public EnumSet argOptions() { + final var argOptions = getPatternIntInfo(IPcre2.INFO_ARGOPTIONS); + return Arrays.stream(Pcre2CompileOption.values()) + .filter(flag -> (argOptions & flag.value()) != 0) + .collect(() -> EnumSet.noneOf(Pcre2CompileOption.class), EnumSet::add, EnumSet::addAll); + } + + /** + * Get the number of capturing subpatterns + * + * @return the number of capturing subpatterns + */ + public int captureCount() { + return getPatternIntInfo(IPcre2.INFO_CAPTURECOUNT); + } + + /** + * Get what \R matches: + * {@link Pcre2Bsr#UNICODE} for Unicode line endings + * {@link Pcre2Bsr#ANYCRLF} for CR, LF, or CRLF only + * + * @return what \R matches + */ + public Pcre2Bsr bsr() { + final var bsr = getPatternIntInfo(IPcre2.INFO_BSR); + return Pcre2Bsr.valueOf(bsr).orElseThrow(); + } + + /** + * Get the backtracking depth limit + * + * @return the backtracking depth limit + */ + public int depthLimit() { + final var depthLimit = getPatternIntInfo(IPcre2.INFO_DEPTHLIMIT); + if (depthLimit == IPcre2.ERROR_UNSET) { + throw new IllegalStateException("Depth limit is not set"); + } + return depthLimit; + } + + /** + * Get the size of backtracking frame + * + * @return the size of backtracking frame + */ + public long frameSize() { + return getPatternSizeInfo(IPcre2.INFO_FRAMESIZE); + } + +// TODO: PCRE2_INFO_EXTRAOPTIONS Extra options that were passed in the compile context +// TODO: PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL +// TODO: PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information +// 0 nothing set +// 1 first code unit is set +// 2 start of string or after newline +// TODO: PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1 + + /** + * Check if the pattern contains \C + * + * @return true if the pattern contains \C, false otherwise + */ + public boolean hasBackslashC() { + final var hasBackslashC = getPatternIntInfo(IPcre2.INFO_HASBACKSLASHC); + return hasBackslashC == 1; + } + + /** + * Check if explicit CR or LF matches exist in the pattern + * + * @return true if explicit CR or LF matches exist in the pattern, false otherwise + */ + public boolean hasCrOrLf() { + final var hasCrOrLf = getPatternIntInfo(IPcre2.INFO_HASCRORLF); + return hasCrOrLf == 1; + } + + /** + * Get the heap limit + * + * @return the heap limit + */ + public int heapLimit() { + final var heapLimit = getPatternIntInfo(IPcre2.INFO_HEAPLIMIT); + if (heapLimit == IPcre2.ERROR_UNSET) { + throw new IllegalStateException("Heap limit is not set"); + } + return heapLimit; + } + + /** + * Check if the pattern uses (?J) or (?-J) + * + * @return true if the pattern uses (?J) or (?-J), false otherwise + */ + public boolean jChanged() { + final var jChanged = getPatternIntInfo(IPcre2.INFO_JCHANGED); + return jChanged == 1; + } + + /** + * Get the size of JIT compiled code, or 0 + * + * @return the size of JIT compiled code, or 0 + */ + public long jitSize() { + return getPatternSizeInfo(IPcre2.INFO_JITSIZE); + } + + /** + * Check if the pattern can match an empty string + * + * @return true if the pattern can match an empty string, false otherwise + */ + public boolean matchEmpty() { + final var matchEmpty = getPatternIntInfo(IPcre2.INFO_MATCHEMPTY); + return matchEmpty == 1; + } + +// TODO:PCRE2_INFO_LASTCODETYPE Type of must-be-present information +// 0 nothing set +// 1 code unit is set +// TODO:PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1 + + /** + * Get the match limit + * + * @return the match limit + */ + public int matchLimit() { + final var matchLimit = getPatternIntInfo(IPcre2.INFO_DEPTHLIMIT); + if (matchLimit == IPcre2.ERROR_UNSET) { + throw new IllegalStateException("Match limit is not set"); + } + return matchLimit; + } + + /** + * Get the length (in characters) of the longest lookbehind assertion + * + * @return the length (in characters) of the longest lookbehind assertion + */ + public int maxLookBehind() { + return getPatternIntInfo(IPcre2.INFO_MAXLOOKBEHIND); + } + + /** + * Get the lower bound length of matching strings + * + * @return the lower bound length of matching strings + */ + public int minLength() { + return getPatternIntInfo(IPcre2.INFO_MINLENGTH); + } + + /** + * Get the number of named subpatterns + * + * @return the number of named subpatterns + */ + public int nameCount() { + return getPatternIntInfo(IPcre2.INFO_NAMECOUNT); + } + + /** + * Get the newline sequence + * + * @return the newline sequence + */ + public Pcre2Newline newline() { + final var newline = getPatternIntInfo(IPcre2.INFO_NEWLINE); + return Pcre2Newline.valueOf(newline).orElseThrow(); + } + + /** + * Get the size of name table entries + * + * @return the size of name table entries + */ + public int nameEntrySize() { + return getPatternIntInfo(IPcre2.INFO_NAMEENTRYSIZE); + } + + /** + * Get the name table + * + * @return the name table + */ + public NameTableEntry[] nameTable() { + final var nameCount = nameCount(); + final var nameEntrySize = nameEntrySize(); + final var where = ByteBuffer.allocate(nameCount * nameEntrySize); + final var error = api.patternInfo(handle, IPcre2.INFO_NAMETABLE, where); + if (error != 0) { + throw new IllegalStateException(Pcre4jUtils.getErrorMessage(api, error)); + } + + final var nameTable = new NameTableEntry[nameCount]; + for (var nameIndex = 0; nameIndex < nameCount; nameIndex++) { + final var offset = nameIndex * nameEntrySize; + final var groupIndex = where.slice(offset, 2).getShort(); + final var groupNameUtf8 = where.slice(offset + 2, nameEntrySize - 2); + while (groupNameUtf8.remaining() > 0) { + if (groupNameUtf8.get() == 0) { + groupNameUtf8.limit(groupNameUtf8.position() - 1); + groupNameUtf8.position(0); + break; + } + } + final var groupName = StandardCharsets.UTF_8.decode(groupNameUtf8).toString(); + nameTable[nameIndex] = new NameTableEntry(groupIndex, groupName); + } + return nameTable; + } + + /** + * Get the size of the compiled pattern + * + * @return the size of the compiled pattern + */ + public long size() { + return getPatternSizeInfo(IPcre2.INFO_SIZE); + } + + /** + * Match this compiled pattern against a given subject string. + * + * @param subject the subject string to match this pattern against + * @param startoffset offset in the subject at which to start matching + * @param options the options, see {@link Pcre2MatchOption} + * @param matchData the match data to store the results in + * @param matchContext the match context to use or null + * @return the number of captures plus one, zero if the {@code matchData} is too small, or a negative value if there + * was no match or an actual error occurred + */ + public int match( + String subject, + int startoffset, + EnumSet options, + Pcre2MatchData matchData, + Pcre2MatchContext matchContext + ) { + if (subject == null) { + throw new IllegalArgumentException("Subject cannot be null"); + } + if (matchData == null) { + throw new IllegalArgumentException("Match data cannot be null"); + } + + return api.match( + handle, + subject, + startoffset, + options + .stream() + .mapToInt(Pcre2MatchOption::value) + .sum(), + matchData.handle, + matchContext != null ? matchContext.handle : 0 + ); + } + + /** + * A name table entry + * + * @param group the group + * @param name the name + */ + public record NameTableEntry(int group, String name) { + } + + private record Clean(IPcre2 api, long code) implements Runnable { + @Override + public void run() { + api.codeFree(code); + } + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2CompileContext.java b/lib/src/main/java/org/pcre4j/Pcre2CompileContext.java new file mode 100644 index 0000000..f54f3d9 --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2CompileContext.java @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.lang.ref.Cleaner; + +public class Pcre2CompileContext { + + private static final Cleaner cleaner = Cleaner.create(); + /** + * The compile context handle + */ + /* package-private */ final long handle; + /** + * The PCRE2 API reference to use across the entire lifecycle of the object + */ + private final IPcre2 api; + /** + * The cleaner to free the resources + */ + private final Cleaner.Cleanable cleanable; + + /** + * Create a new compile context + * + * @param generalContext the general context to use or {@code null} to use the default context + */ + public Pcre2CompileContext(Pcre2GeneralContext generalContext) { + final var api = Pcre4j.api(); + + final var handle = api.compileContextCreate( + generalContext != null ? generalContext.handle : 0 + ); + if (handle == 0) { + throw new IllegalStateException("Failed to create compile context"); + } + + this.api = api; + this.handle = handle; + this.cleanable = cleaner.register(this, new Pcre2CompileContext.Clean(api, handle)); + } + + private record Clean(IPcre2 api, long compileContext) implements Runnable { + @Override + public void run() { + api.compileContextFree(compileContext); + } + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2CompileError.java b/lib/src/main/java/org/pcre4j/Pcre2CompileError.java new file mode 100644 index 0000000..7f3d91a --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2CompileError.java @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +public class Pcre2CompileError extends IllegalArgumentException { + + /** + * The size of the region around the error to show + */ + private static final int PATTERN_REGION_SIZE = 3; + + /** + * Create a new pattern compilation error. + * + * @param pattern the pattern + * @param offset the offset of the error in the pattern + * @param message the error message + */ + public Pcre2CompileError(String pattern, long offset, String message) { + this(pattern, offset, message, null); + } + + /** + * Create a new pattern compilation error. + * + * @param pattern the pattern + * @param offset the offset of the error in the pattern + * @param message the error message + * @param cause the cause of the error + */ + public Pcre2CompileError(String pattern, long offset, String message, Throwable cause) { + super("Error in pattern at %d (%s): %s".formatted(offset, getPatternRegion(pattern, offset), message), cause); + } + + /** + * Get the region around the error in the pattern. + * + * @param pattern the pattern + * @param offset the offset of the error in the pattern + * @return the region around the error + */ + private static String getPatternRegion(String pattern, long offset) { + final var since = Math.max(0, offset - PATTERN_REGION_SIZE); + final var until = Math.min(pattern.length(), offset + PATTERN_REGION_SIZE); + + var region = pattern.substring((int) since, (int) until); + if (since > 0) { + region = "…" + region; + } + if (until < pattern.length()) { + region = region + "…"; + } + + return region; + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2CompileOption.java b/lib/src/main/java/org/pcre4j/Pcre2CompileOption.java new file mode 100644 index 0000000..0f8cbe4 --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2CompileOption.java @@ -0,0 +1,205 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.util.Arrays; +import java.util.Optional; + +public enum Pcre2CompileOption { + /** + * Force pattern anchoring + */ + ANCHORED(IPcre2.ANCHORED), + + /** + * Do not check the pattern for UTF validity (only relevant if UTF is set) + */ + NO_UTF_CHECK(IPcre2.NO_UTF_CHECK), + + /** + * Pattern can match only at end of subject + */ + ENDANCHORED(IPcre2.ENDANCHORED), + + /** + * Allow empty classes + */ + ALLOW_EMPTY_CLASS(IPcre2.ALLOW_EMPTY_CLASS), + + /** + * Alternative handling of ⧵u, ⧵U, and ⧵x + */ + ALT_BSUX(IPcre2.ALT_BSUX), + + /** + * Compile automatic callouts + */ + AUTO_CALLOUT(IPcre2.AUTO_CALLOUT), + + /** + * Do caseless matching + */ + CASELESS(IPcre2.CASELESS), + + /** + * $ not to match newline at end + */ + DOLLAR_ENDONLY(IPcre2.DOLLAR_ENDONLY), + + /** + * . matches anything including NL + */ + DOTALL(IPcre2.DOTALL), + + /** + * Allow duplicate names for subpatterns + */ + DUPNAMES(IPcre2.DUPNAMES), + + /** + * Ignore white space and # comments + */ + EXTENDED(IPcre2.EXTENDED), + + /** + * Force matching to be before newline + */ + FIRSTLINE(IPcre2.FIRSTLINE), + + /** + * Match unset backreferences + */ + MATCH_UNSET_BACKREF(IPcre2.MATCH_UNSET_BACKREF), + + /** + * ^ and $ match newlines within data + */ + MULTILINE(IPcre2.MULTILINE), + + /** + * Lock out PCRE2_UCP, e.g. via (*UCP) + */ + NEVER_UCP(IPcre2.NEVER_UCP), + + /** + * Lock out PCRE2_UTF, e.g. via (*UTF) + */ + NEVER_UTF(IPcre2.NEVER_UTF), + + /** + * Disable numbered capturing parentheses (named ones available) + */ + NO_AUTO_CAPTURE(IPcre2.NO_AUTO_CAPTURE), + + /** + * Disable auto-possessification + */ + NO_AUTO_POSSESS(IPcre2.NO_AUTO_POSSESS), + + /** + * Disable automatic anchoring for .* + */ + NO_DOTSTAR_ANCHOR(IPcre2.NO_DOTSTAR_ANCHOR), + + /** + * Disable match-time start optimizations + */ + NO_START_OPTIMIZE(IPcre2.NO_START_OPTIMIZE), + + /** + * Use Unicode properties for \d, \w, etc. + */ + UCP(IPcre2.UCP), + + /** + * Invert greediness of quantifiers + */ + UNGREEDY(IPcre2.UNGREEDY), + + /** + * Treat pattern and subjects as UTF strings + */ + UTF(IPcre2.UTF), + + /** + * Lock out the use of \C in patterns + */ + NEVER_BACKSLASH_C(IPcre2.NEVER_BACKSLASH_C), + + /** + * Alternative handling of ^ in multiline mode + */ + ALT_CIRCUMFLEX(IPcre2.ALT_CIRCUMFLEX), + + /** + * Process backslashes in verb names + */ + ALT_VERBNAMES(IPcre2.ALT_VERBNAMES), + + /** + * Enable offset limit for unanchored matching + */ + USE_OFFSET_LIMIT(IPcre2.USE_OFFSET_LIMIT), + + EXTENDED_MORE(IPcre2.EXTENDED_MORE), + + /** + * Pattern characters are all literal + */ + LITERAL(IPcre2.LITERAL), + + /** + * Enable support for matching invalid UTF + */ + MATCH_INVALID_UTF(IPcre2.MATCH_INVALID_UTF); + + /** + * The integer value of the option + */ + private final int value; + + /** + * Create a new enum value for the given option value. + * + * @param value the integer value of the option + */ + private Pcre2CompileOption(int value) { + this.value = value; + } + + /** + * Get the enum value by its option value. + * + * @param value the integer value of the option + * @return the flag + */ + public static Optional valueOf(int value) { + return Arrays.stream(values()) + .filter(flag -> flag.value == value) + .findFirst(); + } + + /** + * Get the option value of the enum value. + * + * @return the integer value of the option + */ + public int value() { + return value; + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2GeneralContext.java b/lib/src/main/java/org/pcre4j/Pcre2GeneralContext.java new file mode 100644 index 0000000..1341468 --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2GeneralContext.java @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.lang.ref.Cleaner; + +public class Pcre2GeneralContext { + + private static final Cleaner cleaner = Cleaner.create(); + /** + * The general context handle + */ + /* package-private */ final long handle; + /** + * The PCRE2 API reference to use across the entire lifecycle of the object + */ + private final IPcre2 api; + /** + * The cleaner to free the resources + */ + private final Cleaner.Cleanable cleanable; + + /** + * Create a new general context using system default memory management functions + */ + public Pcre2GeneralContext() { + final var api = Pcre4j.api(); + + final var handle = api.generalContextCreate(0, 0, 0); + if (handle == 0) { + throw new IllegalStateException("Failed to create general context"); + } + + this.api = api; + this.handle = handle; + this.cleanable = cleaner.register(this, new Pcre2GeneralContext.Clean(api, handle)); + } + + private record Clean(IPcre2 api, long generalContext) implements Runnable { + @Override + public void run() { + api.generalContextFree(generalContext); + } + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2JitOption.java b/lib/src/main/java/org/pcre4j/Pcre2JitOption.java new file mode 100644 index 0000000..63c1da6 --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2JitOption.java @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.util.Arrays; +import java.util.Optional; + +public enum Pcre2JitOption { + /** + * Compile code for full matching + */ + COMPLETE(IPcre2.JIT_COMPLETE), + + /** + * Compile code for soft partial matching + */ + PARTIAL_SOFT(IPcre2.JIT_PARTIAL_SOFT), + + /** + * Compile code for hard partial matching + */ + PARTIAL_HARD(IPcre2.JIT_PARTIAL_HARD), + + /** + * @deprecated Use {@link Pcre2CompileOption#MATCH_INVALID_UTF} + */ + @Deprecated INVALID_UTF(IPcre2.JIT_INVALID_UTF); + + /** + * The integer value of the option + */ + private final int value; + + /** + * Create a new enum value for the given option value. + * + * @param value the integer value of the option + */ + private Pcre2JitOption(int value) { + this.value = value; + } + + /** + * Get the enum value by its option value. + * + * @param value the integer value of the option + * @return the flag + */ + public static Optional valueOf(int value) { + return Arrays.stream(values()) + .filter(flag -> flag.value == value) + .findFirst(); + } + + /** + * Get the option value of the enum value. + * + * @return the integer value of the option + */ + public int value() { + return value; + } +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2MatchContext.java b/lib/src/main/java/org/pcre4j/Pcre2MatchContext.java new file mode 100644 index 0000000..88423d2 --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2MatchContext.java @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.lang.ref.Cleaner; + +public class Pcre2MatchContext { + + private static final Cleaner cleaner = Cleaner.create(); + /** + * The match context handle + */ + /* package-private */ final long handle; + /** + * The PCRE2 API reference to use across the entire lifecycle of the object + */ + private final IPcre2 api; + /** + * The cleaner to free the resources + */ + private final Cleaner.Cleanable cleanable; + + /** + * Create a new match context + * + * @param generalContext the general context to use or {@code null} to use the default context + */ + public Pcre2MatchContext(Pcre2GeneralContext generalContext) { + final var api = Pcre4j.api(); + + final var handle = api.matchContextCreate( + generalContext != null ? generalContext.handle : 0 + ); + if (handle == 0) { + throw new IllegalStateException("Failed to create match context"); + } + + this.api = api; + this.handle = handle; + this.cleanable = cleaner.register(this, new Pcre2MatchContext.Clean(api, handle)); + } + + private record Clean(IPcre2 api, long matchContext) implements Runnable { + @Override + public void run() { + api.matchContextFree(matchContext); + } + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2MatchData.java b/lib/src/main/java/org/pcre4j/Pcre2MatchData.java new file mode 100644 index 0000000..1430cbc --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2MatchData.java @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.lang.ref.Cleaner; + +/** + * The match data where the results of the match are stored + */ +public class Pcre2MatchData { + + private static final Cleaner cleaner = Cleaner.create(); + /** + * The match data handle + */ + /* package-private */ final long handle; + /** + * The PCRE2 API reference to use across the entire lifecycle of the object + */ + private final IPcre2 api; + /** + * The cleaner to free the resources + */ + private final Cleaner.Cleanable cleanable; + + /** + * Create a new match data object + * + * @param ovecsize the size of the output vector + */ + public Pcre2MatchData(int ovecsize) { + final var api = Pcre4j.api(); + + final var handle = api.matchDataCreate( + ovecsize, + 0 + ); + if (handle == 0) { + throw new IllegalStateException("Failed to create match data"); + } + + this.api = api; + this.handle = handle; + this.cleanable = cleaner.register(this, new Pcre2MatchData.Clean(api, handle)); + } + + /** + * Create a new match data object + * + * @param code the compiled pattern to create the match data for + */ + public Pcre2MatchData(Pcre2Code code) { + final var api = Pcre4j.api(); + + final var handle = api.matchDataCreateFromPattern( + code.handle, + 0 + ); + if (handle == 0) { + throw new IllegalStateException("Failed to create match data from pattern"); + } + + this.api = api; + this.handle = handle; + this.cleanable = cleaner.register(this, new Pcre2MatchData.Clean(api, handle)); + } + + /** + * Get number of the offset pairs in the output vector + * + * @return the number of the offset pairs in the output vector + */ + public int ovectorCount() { + return api.getOvectorCount(handle); + } + + /** + * Get the output vector offset pairs + * + * @return the output vector offset pairs + */ + public OffsetPair[] ovector() { + final var count = ovectorCount(); + final var offsets = new long[count * 2]; + api.getOvector(handle, offsets); + + final var ovector = new OffsetPair[count]; + for (int pairIndex = 0; pairIndex < count; pairIndex++) { + ovector[pairIndex] = new OffsetPair( + (int) offsets[pairIndex * 2], + (int) offsets[pairIndex * 2 + 1] + ); + } + return ovector; + } + + /** + * The output vector offset pair + * + * @param start the start offset in the subject string + * @param end the end offset in the subject string + */ + public record OffsetPair(int start, int end) { + } + + private record Clean(IPcre2 api, long matchData) implements Runnable { + @Override + public void run() { + api.matchDataFree(matchData); + } + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2MatchOption.java b/lib/src/main/java/org/pcre4j/Pcre2MatchOption.java new file mode 100644 index 0000000..776d9ac --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2MatchOption.java @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.util.Arrays; +import java.util.Optional; + +/** + * Match options for {@link Pcre2Code#match} + */ +public enum Pcre2MatchOption { + /** + * Match only at the first position + */ + ANCHORED(IPcre2.ANCHORED), + + /** + * On success, make a private subject copy + */ + COPY_MATCHED_SUBJECT(IPcre2.COPY_MATCHED_SUBJECT), + + /** + * Pattern can match only at end of subject + */ + ENDANCHORED(IPcre2.ENDANCHORED), + + /** + * Subject string is not the beginning of a line + */ + NOTBOL(IPcre2.NOTBOL), + + /** + * Subject string is not the end of a line + */ + NOTEOL(IPcre2.NOTEOL), + + /** + * An empty string is not a valid match + */ + NOTEMPTY(IPcre2.NOTEMPTY), + + /** + * An empty string at the start of the subject is not a valid match + */ + NOTEMPTY_ATSTART(IPcre2.NOTEMPTY_ATSTART), + + /** + * Do not use JIT matching + */ + NO_JIT(IPcre2.NO_JIT), + + /** + * Do not check the subject for UTF validity (only relevant if PCRE2_UTF was set at compile time) + */ + NO_UTF_CHECK(IPcre2.NO_UTF_CHECK), + + /** + * Return {@link IPcre2#ERROR_PARTIAL} for a partial match even if there is a full match + */ + PARTIAL_HARD(IPcre2.PARTIAL_HARD), + + /** + * Return {@link IPcre2#ERROR_PARTIAL} for a partial match if no full matches are found + */ + PARTIAL_SOFT(IPcre2.PARTIAL_SOFT); + + /** + * The integer value of the option + */ + private final int value; + + /** + * Create a new enum value for the given option value. + * + * @param value the integer value of the option + */ + private Pcre2MatchOption(int value) { + this.value = value; + } + + /** + * Get the enum value by its option value. + * + * @param value the integer value of the option + * @return the flag + */ + public static Optional valueOf(int value) { + return Arrays.stream(values()) + .filter(flag -> flag.value == value) + .findFirst(); + } + + /** + * Get the option value of the enum value. + * + * @return the integer value of the option + */ + public int value() { + return value; + } +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2Newline.java b/lib/src/main/java/org/pcre4j/Pcre2Newline.java new file mode 100644 index 0000000..9905544 --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2Newline.java @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.util.Arrays; +import java.util.Optional; + +public enum Pcre2Newline { + NEWLINE_CR(IPcre2.NEWLINE_CR), + NEWLINE_LF(IPcre2.NEWLINE_LF), + NEWLINE_CRLF(IPcre2.NEWLINE_CRLF), + NEWLINE_ANY(IPcre2.NEWLINE_ANY), + NEWLINE_ANYCRLF(IPcre2.NEWLINE_ANYCRLF), + NEWLINE_NUL(IPcre2.NEWLINE_NUL); + + /** + * The integer value + */ + private final int value; + + /** + * Create an enum entry with the given integer value. + * + * @param value the integer value + */ + private Pcre2Newline(int value) { + this.value = value; + } + + /** + * Get the enum entry by its integer value. + * + * @param value the integer value + * @return the enum entry + */ + public static Optional valueOf(int value) { + return Arrays.stream(values()) + .filter(entry -> entry.value == value) + .findFirst(); + } + + /** + * Get the integer value. + * + * @return the integer value + */ + public int value() { + return value; + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2PatternInfo.java b/lib/src/main/java/org/pcre4j/Pcre2PatternInfo.java new file mode 100644 index 0000000..781c207 --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2PatternInfo.java @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.util.Arrays; +import java.util.Optional; + +public enum Pcre2PatternInfo { + /** + * Final options after compiling + */ + INFO_ALLOPTIONS(IPcre2.INFO_ALLOPTIONS), + + /** + * Options passed to {@link IPcre2#compile(String, int, int[], long[], long)} + */ + INFO_ARGOPTIONS(IPcre2.INFO_ARGOPTIONS), + + /** + * Number of highest backreference + */ + INFO_BACKREFMAX(IPcre2.INFO_BACKREFMAX), + + /** + * What \R matches: + * PCRE2_BSR_UNICODE: Unicode line endings + * PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only + */ + INFO_BSR(IPcre2.INFO_BSR), + + /** + * Number of capturing subpatterns + */ + INFO_CAPTURECOUNT(IPcre2.INFO_CAPTURECOUNT), + + /** + * First code unit when type is 1 + */ + INFO_FIRSTCODEUNIT(IPcre2.INFO_FIRSTCODEUNIT), + + /** + * Type of start-of-match information + * 0 nothing set + * 1 first code unit is set + * 2 start of string or after newline + */ + INFO_FIRSTCODETYPE(IPcre2.INFO_FIRSTCODETYPE), + + /** + * Bitmap of first code units, or 0 + */ + INFO_FIRSTBITMAP(IPcre2.INFO_FIRSTBITMAP), + + /** + * Return 1 if explicit CR or LF matches exist in the pattern + */ + INFO_HASCRORLF(IPcre2.INFO_HASCRORLF), + + /** + * Return 1 if (?J) or (?-J) was used + */ + INFO_JCHANGED(IPcre2.INFO_JCHANGED), + + /** + * Size of JIT compiled code, or 0 + */ + INFO_JITSIZE(IPcre2.INFO_JITSIZE), + + /** + * Last code unit when type is 1 + */ + INFO_LASTCODEUNIT(IPcre2.INFO_LASTCODEUNIT), + + /** + * Type of must-be-present information + * 0 nothing set + * 1 code unit is set + */ + INFO_LASTCODETYPE(IPcre2.INFO_LASTCODETYPE), + + /** + * 1 if the pattern can match an empty string, 0 otherwise + */ + INFO_MATCHEMPTY(IPcre2.INFO_MATCHEMPTY), + + /** + * Match limit if set, otherwise {@link IPcre2#ERROR_UNSET} + */ + INFO_MATCHLIMIT(IPcre2.INFO_MATCHLIMIT), + + /** + * Length (in characters) of the longest lookbehind assertion + */ + INFO_MAXLOOKBEHIND(IPcre2.INFO_MAXLOOKBEHIND), + + /** + * Lower bound length of matching strings + */ + INFO_MINLENGTH(IPcre2.INFO_MINLENGTH), + + /** + * Number of named subpatterns + */ + INFO_NAMECOUNT(IPcre2.INFO_NAMECOUNT), + + /** + * Size of name table entries + */ + INFO_NAMEENTRYSIZE(IPcre2.INFO_NAMEENTRYSIZE), + + /** + * Pointer to name table + */ + INFO_NAMETABLE(IPcre2.INFO_NAMETABLE), + + /** + * Code for the newline sequence: + * {@link IPcre2#NEWLINE_CR} + * {@link IPcre2#NEWLINE_LF} + * {@link IPcre2#NEWLINE_CRLF} + * {@link IPcre2#NEWLINE_ANY} + * {@link IPcre2#NEWLINE_ANYCRLF} + * {@link IPcre2#NEWLINE_NUL} + */ + INFO_NEWLINE(IPcre2.INFO_NEWLINE), + + /** + * Backtracking depth limit if set, otherwise {@link IPcre2#ERROR_UNSET} + */ + INFO_DEPTHLIMIT(IPcre2.INFO_DEPTHLIMIT), + + /** + * Obsolete synonym for {@link #INFO_DEPTHLIMIT} + */ + @Deprecated INFO_RECURSIONLIMIT(IPcre2.INFO_RECURSIONLIMIT), + + /** + * Size of compiled pattern + */ + INFO_SIZE(IPcre2.INFO_SIZE), + + /** + * Return 1 if pattern contains \C + */ + INFO_HASBACKSLASHC(IPcre2.INFO_HASBACKSLASHC), + + /** + * Size of backtracking frame + */ + INFO_FRAMESIZE(IPcre2.INFO_FRAMESIZE), + + /** + * Heap memory limit if set, otherwise {@link IPcre2#ERROR_UNSET} + */ + INFO_HEAPLIMIT(IPcre2.INFO_HEAPLIMIT), + + /** + * Extra options that were passed in the compile context + */ + INFO_EXTRAOPTIONS(IPcre2.INFO_EXTRAOPTIONS); + + /** + * The integer value + */ + private final int value; + + /** + * Create an enum entry with the given integer value. + * + * @param value the integer value + */ + private Pcre2PatternInfo(int value) { + this.value = value; + } + + /** + * Get the enum entry by its integer value. + * + * @param value the integer value + * @return the enum entry + */ + public static Optional valueOf(int value) { + return Arrays.stream(values()) + .filter(entry -> entry.value == value) + .findFirst(); + } + + /** + * Get the integer value. + * + * @return the integer value + */ + public int value() { + return value; + } +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2PatternInfoSizeError.java b/lib/src/main/java/org/pcre4j/Pcre2PatternInfoSizeError.java new file mode 100644 index 0000000..348cf3c --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2PatternInfoSizeError.java @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +public class Pcre2PatternInfoSizeError extends RuntimeException { + + public Pcre2PatternInfoSizeError(Pcre2PatternInfo info, long size) { + this(info, size, null); + } + + public Pcre2PatternInfoSizeError(Pcre2PatternInfo info, long size, Throwable cause) { + super("Unexpected size of %d bytes for %s".formatted(size, info), cause); + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre4j.java b/lib/src/main/java/org/pcre4j/Pcre4j.java new file mode 100644 index 0000000..3f8944e --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre4j.java @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + + +public final class Pcre4j { + + private static final Object lock = new Object(); + private static IPcre2 api = null; + + private Pcre4j() { + } + + /** + * Setup the Pcre4j. + * + * @param api the API to use + */ + public static void setup(IPcre2 api) { + if (api == null) { + throw new IllegalArgumentException("api must not be null"); + } + + synchronized (lock) { + Pcre4j.api = api; + } + } + + /** + * Get the API. + * + * @return the API + */ + public static IPcre2 api() { + synchronized (lock) { + if (api == null) { + throw new IllegalStateException("Call Pcre4j.setup() first."); + } + return api; + } + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre4jUtils.java b/lib/src/main/java/org/pcre4j/Pcre4jUtils.java new file mode 100644 index 0000000..df58fa5 --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre4jUtils.java @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + +public final class Pcre4jUtils { + + /** + * Private constructor to prevent instantiation. + */ + private Pcre4jUtils() { + } + + /** + * Get the error message for the given error code. + * + * @param api the PCRE2 API + * @param errorcode the error code + * @return the error message + */ + public static String getErrorMessage(IPcre2 api, int errorcode) { + if (api == null) { + throw new IllegalArgumentException("api must not be null"); + } + + var buffer = ByteBuffer.allocateDirect(256); + while (true) { + final var size = api.getErrorMessage(errorcode, buffer); + if (size == IPcre2.ERROR_NOMEMORY) { + buffer = ByteBuffer.allocateDirect(buffer.capacity() * 2); + continue; + } else if (size < 0) { + throw new IllegalStateException("Error getting error message: %d".formatted(size)); + } + + return StandardCharsets.UTF_8.decode(buffer.slice(0, size)).toString(); + } + } + + /** + * Get the group names for the given code. + * + * @param code the PCRE2 compiled pattern + * @return an array where the index is the group number and the value is the group name or {@code null} if the group + * has no name + */ + public static String[] getGroupNames(Pcre2Code code) { + if (code == null) { + throw new IllegalArgumentException("code must not be null"); + } + + final var groupNames = new String[code.captureCount()]; + for (var nameTableEntry : code.nameTable()) { + groupNames[nameTableEntry.group() - 1] = nameTableEntry.name(); + } + + return groupNames; + } + + /** + * Get the match groups + * + * @param code the compiled pattern the match was performed with + * @param subject the subject string the match was performed against + * @param matchData the match data with the match results + * @return an array of strings where the index is the group number and the value is the matched group or + * {@code null} + */ + public static String[] getMatchGroups(Pcre2Code code, String subject, Pcre2MatchData matchData) { + if (code == null) { + throw new IllegalArgumentException("code must not be null"); + } + if (subject == null) { + throw new IllegalArgumentException("subject must not be null"); + } + if (matchData == null) { + throw new IllegalArgumentException("matchData must not be null"); + } + + final var ovector = matchData.ovector(); + final var matchGroups = new String[ovector.length]; + for (var matchIndex = 0; matchIndex < ovector.length; matchIndex++) { + final var match = ovector[matchIndex]; + matchGroups[matchIndex] = subject.substring(match.start(), match.end()); + } + return matchGroups; + } + + /** + * Get the match named groups + * + * @param code the compiled pattern the match was performed with + * @param subject the subject string the match was performed against + * @param matchData the match data with the match results + * @return a map of group names to the matched group or {@code null} + */ + public static Map getNamedMatchGroups(Pcre2Code code, String subject, Pcre2MatchData matchData) { + if (code == null) { + throw new IllegalArgumentException("code must not be null"); + } + if (subject == null) { + throw new IllegalArgumentException("subject must not be null"); + } + if (matchData == null) { + throw new IllegalArgumentException("matchData must not be null"); + } + + final var groupNames = getGroupNames(code); + final var ovector = matchData.ovector(); + final var matchGroups = new HashMap(); + for (var matchIndex = 1; matchIndex < ovector.length; matchIndex++) { + final var match = ovector[matchIndex]; + final var groupName = groupNames[matchIndex - 1]; + if (groupName != null) { + matchGroups.put(groupName, subject.substring(match.start(), match.end())); + } + } + return matchGroups; + } +} diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..2c0c66e --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +rootProject.name = "pcre4j" + +include(":api") +include(":lib") +include(":test") +include(":ffm") +include(":jna") diff --git a/test/build.gradle.kts b/test/build.gradle.kts new file mode 100644 index 0000000..020cba6 --- /dev/null +++ b/test/build.gradle.kts @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +plugins { + `java-library` +} + +repositories { + mavenCentral() +} + +dependencies { + implementation(project(":lib")) + implementation("org.junit.jupiter:junit-jupiter:5.10.2") + testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") + testRuntimeOnly("org.junit.platform:junit-platform-launcher") +} + +configurations { + implementation { + resolutionStrategy.failOnVersionConflict() + } +} + +sourceSets { + main { + java.srcDir("src/main/java") + } +} + +java { + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 + + toolchain { + languageVersion = JavaLanguageVersion.of(21) + } +} + +tasks.withType { + useJUnitPlatform() +} diff --git a/test/src/main/java/org/pcre4j/test/Pcre2Tests.java b/test/src/main/java/org/pcre4j/test/Pcre2Tests.java new file mode 100644 index 0000000..ea51079 --- /dev/null +++ b/test/src/main/java/org/pcre4j/test/Pcre2Tests.java @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j.test; + +import org.junit.jupiter.api.Test; +import org.pcre4j.Pcre2Code; +import org.pcre4j.Pcre2CompileOption; +import org.pcre4j.Pcre2MatchData; +import org.pcre4j.Pcre2MatchOption; + +import java.util.EnumSet; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public abstract class Pcre2Tests { + + @Test + public void plainStringMatch() { + final var code = new Pcre2Code( + "42", + EnumSet.noneOf(Pcre2CompileOption.class), + null + ); + final var matchData = new Pcre2MatchData(code); + + final var result = code.match( + "42", + 0, + EnumSet.noneOf(Pcre2MatchOption.class), + matchData, + null + ); + assertEquals(1, result); + + final var ovector = matchData.ovector(); + assertArrayEquals(new Pcre2MatchData.OffsetPair[]{ + new Pcre2MatchData.OffsetPair(0, 2), + }, ovector); + } + + @Test + public void plainStringMatchNoCapture() { + final var code = new Pcre2Code( + "(?:42)", + EnumSet.noneOf(Pcre2CompileOption.class), + null + ); + final var matchData = new Pcre2MatchData(code); + + final var result = code.match( + "42", + 0, + EnumSet.noneOf(Pcre2MatchOption.class), + matchData, + null + ); + assertEquals(1, result); + + final var ovector = matchData.ovector(); + assertArrayEquals(new Pcre2MatchData.OffsetPair[]{ + new Pcre2MatchData.OffsetPair(0, 2), + }, ovector); + } + + @Test + public void plainStringMatchCapture() { + final var code = new Pcre2Code( + "(42)", + EnumSet.noneOf(Pcre2CompileOption.class), + null + ); + final var matchData = new Pcre2MatchData(code); + + final var result = code.match( + "42", + 0, + EnumSet.noneOf(Pcre2MatchOption.class), + matchData, + null + ); + assertEquals(2, result); + + final var ovector = matchData.ovector(); + assertArrayEquals(new Pcre2MatchData.OffsetPair[]{ + new Pcre2MatchData.OffsetPair(0, 2), + new Pcre2MatchData.OffsetPair(0, 2), + }, ovector); + } + + @Test + public void plainStringMatchNamedCapture() { + final var code = new Pcre2Code( + "(?P42)", + EnumSet.noneOf(Pcre2CompileOption.class), + null + ); + final var matchData = new Pcre2MatchData(code); + + final var result = code.match( + "42", + 0, + EnumSet.noneOf(Pcre2MatchOption.class), + matchData, + null + ); + assertEquals(2, result); + + final var ovector = matchData.ovector(); + assertArrayEquals(new Pcre2MatchData.OffsetPair[]{ + new Pcre2MatchData.OffsetPair(0, 2), + new Pcre2MatchData.OffsetPair(0, 2), + }, ovector); + } + +}