Skip to content

Commit

Permalink
Copy ahocorasick code to project
Browse files Browse the repository at this point in the history
  • Loading branch information
ismartcoding committed Jul 2, 2024
1 parent 5f84ed3 commit 50c0eed
Show file tree
Hide file tree
Showing 40 changed files with 1,826 additions and 21 deletions.
Binary file not shown.
Binary file not shown.
26 changes: 12 additions & 14 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
[versions]
ahocorasickVersion="0.6.3"
androidGifDrawableVersion="1.2.23"
androidsvgAarVersion="1.4"
appcompatVersion="1.6.1"
appcompatVersion="1.7.0"
bcprovJdk15onVersion="1.70"
coil="3.0.0-alpha06"
coreKtxVersion="1.13.1"
exoplayerVersion="2.19.1"
firebaseCrashlyticsGradleVersion="2.9.9"
fragmentKtxVersion="1.7.0"
firebaseCrashlyticsGradleVersion="3.0.1"
fragmentKtxVersion="1.8.0"
glideVersion="4.16.0"
googleServicesVersion="4.4.1"
googleServicesVersion="4.4.2"
gradle= "8.3.2"
accompanistDrawablepainterVersion = "0.34.0"
cameraCoreVersion = "1.4.0-alpha04"
cameraCoreVersion = "1.4.0-beta02"
apollo = "3.2.1"
compose = "1.6.7"
compose = "1.6.8"
activityComposeVersion = "1.9.0"
composeBom = "2024.05.00"
composeBom = "2024.06.00"
coreSplashscreenVersion = "1.0.1"
coreVersion = "3.5.3"
datastorePreferencesVersion = "1.1.1"
firebaseCrashlyticsKtxVersion = "18.6.3"
firebaseBomVersion = "32.8.0"
firebaseCrashlyticsKtxVersion = "19.0.1"
firebaseBomVersion = "33.1.0"
gsonVersion="2.10.1"
guavaVersion="33.0.0-jre"
jsoupVersion="1.15.3"
Expand All @@ -34,12 +33,12 @@ kotlinxCoroutinesVersion ="1.8.1"
ktor = "3.0.0-beta-1"
leakcanaryAndroidVersion = "2.12"
lifecycleExtensionsVersion="2.2.0"
lifecycleViewmodelKtxVersion="2.8.0"
lifecycleViewmodelKtxVersion="2.8.2"
markwon="4.6.2"
materialVersion="1.12.0"
media3 = "1.3.1"
navigationComposeVersion = "2.7.7"
materialIconsExtendedVersion = "1.7.0-alpha05"
materialIconsExtendedVersion = "1.7.0-beta03"
material3Version = "1.2.1"
okhttpVersion="4.12.0"
openaiClientVersion = "3.6.2"
Expand All @@ -50,14 +49,13 @@ room = "2.6.1"
snakeYamlVersion = "v1.18-android"
subsamplingScaleImageViewAndroidxVersion="3.10.0"
transitionVersion="1.5.0"
viewpager2Version = "1.0.0"
viewpager2Version = "1.1.0"
workRuntimeKtxVersion = "2.9.0"
ztZipVersion = "1.16"
devtoolsKspVersion = "1.9.23-1.0.20"
pagingVersion = "3.3.0"

[libraries]
ahocorasick = { module = "org.ahocorasick:ahocorasick", version.ref = "ahocorasickVersion" }
android-gif-drawable = { module = "pl.droidsonroids.gif:android-gif-drawable", version.ref = "androidGifDrawableVersion" }
androidsvg-aar = { module = "com.caverock:androidsvg-aar", version.ref = "androidsvgAarVersion" }
androidx-appcompat = { module = "androidx.appcompat:appcompat", version.ref = "appcompatVersion" }
Expand Down
1 change: 0 additions & 1 deletion lib/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ dependencies {
// https://github.com/davemorrissey/subsampling-scale-image-view
api(libs.subsampling.scale.image.view.androidx)

implementation(libs.ahocorasick) // For pinyin
implementation(libs.bcprov.jdk15on)
implementation(libs.bcpkix.jdk15on)
api(libs.ktor.client.core)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package com.ismartcoding.lib.ahocorasick.interval;


/**
* Responsible for tracking the start and end bounds, which are reused by
* both {@link Emit} and {@link PayloadEmit}.
*/
public class Interval implements Intervalable {

private final int start;
private final int end;

/**
* Constructs an interval with a start and end position.
*
* @param start The interval's starting text position.
* @param end The interval's ending text position.
*/
public Interval(final int start, final int end) {
this.start = start;
this.end = end;
}

/**
* Returns the starting offset into the text for this interval.
*
* @return A number between 0 (start of text) and the text length.
*/
@Override
public int getStart() {
return this.start;
}

/**
* Returns the ending offset into the text for this interval.
*
* @return A number between getStart() + 1 and the text length.
*/
@Override
public int getEnd() {
return this.end;
}

/**
* Returns the length of the interval.
*
* @return The end position less the start position, plus one.
*/
@Override
public int size() {
return end - start + 1;
}

/**
* Answers whether the given interval overlaps this interval
* instance.
*
* @param other the other interval to check for overlap
* @return true The intervals overlap.
*/
public boolean overlapsWith(final Interval other) {
return this.start <= other.getEnd() &&
this.end >= other.getStart();
}

public boolean overlapsWith(int point) {
return this.start <= point && point <= this.end;
}

@Override
public boolean equals(Object o) {
if (!(o instanceof Intervalable)) {
return false;
}
Intervalable other = (Intervalable) o;
return this.start == other.getStart() &&
this.end == other.getEnd();
}

@Override
public int hashCode() {
return this.start % 100 + this.end % 100;
}

@Override
public int compareTo(Object o) {
if (!(o instanceof Intervalable)) {
return -1;
}
Intervalable other = (Intervalable) o;
int comparison = this.start - other.getStart();
return comparison != 0 ? comparison : this.end - other.getEnd();
}

/**
* Returns the starting offset and ending offset separated
* by a full colon (:).
*
* @return A non-null String, never empty.
*/
@Override
public String toString() {
return this.start + ":" + this.end;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package com.ismartcoding.lib.ahocorasick.interval;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class IntervalNode {

private enum Direction {LEFT, RIGHT}

private IntervalNode left;
private IntervalNode right;
private int point;
private List<Intervalable> intervals = new ArrayList<>();

public IntervalNode(final List<Intervalable> intervals) {
this.point = determineMedian(intervals);

final List<Intervalable> toLeft = new ArrayList<>();
final List<Intervalable> toRight = new ArrayList<>();

for (Intervalable interval : intervals) {
if (interval.getEnd() < this.point) {
toLeft.add(interval);
} else if (interval.getStart() > this.point) {
toRight.add(interval);
} else {
this.intervals.add(interval);
}
}

if (toLeft.size() > 0) {
this.left = new IntervalNode(toLeft);
}
if (toRight.size() > 0) {
this.right = new IntervalNode(toRight);
}
}

public int determineMedian(final List<Intervalable> intervals) {
int start = -1;
int end = -1;
for (Intervalable interval : intervals) {
int currentStart = interval.getStart();
int currentEnd = interval.getEnd();
if (start == -1 || currentStart < start) {
start = currentStart;
}
if (end == -1 || currentEnd > end) {
end = currentEnd;
}
}
return (start + end) / 2;
}

public List<Intervalable> findOverlaps(final Intervalable interval) {
final List<Intervalable> overlaps = new ArrayList<>();

if (this.point < interval.getStart()) {
// Tends to the right
addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval));
addToOverlaps(interval, overlaps, checkForOverlapsToTheRight(interval));
} else if (this.point > interval.getEnd()) {
// Tends to the left
addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval));
addToOverlaps(interval, overlaps, checkForOverlapsToTheLeft(interval));
} else {
// Somewhere in the middle
addToOverlaps(interval, overlaps, this.intervals);
addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval));
addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval));
}

return overlaps;
}

protected void addToOverlaps(
final Intervalable interval,
final List<Intervalable> overlaps,
final List<Intervalable> newOverlaps) {
for (final Intervalable currentInterval : newOverlaps) {
if (!currentInterval.equals(interval)) {
overlaps.add(currentInterval);
}
}
}

protected List<Intervalable> checkForOverlapsToTheLeft(final Intervalable interval) {
return checkForOverlaps(interval, Direction.LEFT);
}

protected List<Intervalable> checkForOverlapsToTheRight(final Intervalable interval) {
return checkForOverlaps(interval, Direction.RIGHT);
}

protected List<Intervalable> checkForOverlaps(
final Intervalable interval, final Direction direction) {
final List<Intervalable> overlaps = new ArrayList<>();

for (final Intervalable currentInterval : this.intervals) {
switch (direction) {
case LEFT:
if (currentInterval.getStart() <= interval.getEnd()) {
overlaps.add(currentInterval);
}
break;
case RIGHT:
if (currentInterval.getEnd() >= interval.getStart()) {
overlaps.add(currentInterval);
}
break;
}
}

return overlaps;
}

protected List<Intervalable> findOverlappingRanges(IntervalNode node, Intervalable interval) {
return node == null
? Collections.<Intervalable>emptyList()
: node.findOverlaps(interval);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package com.ismartcoding.lib.ahocorasick.interval;

import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import static java.util.Collections.sort;

public class IntervalTree {

private final IntervalNode rootNode;

public IntervalTree(List<Intervalable> intervals) {
this.rootNode = new IntervalNode(intervals);
}

public List<Intervalable> removeOverlaps(final List<Intervalable> intervals) {

// Sort the intervals on size, then left-most position
sort(intervals, new IntervalableComparatorBySize());

final Set<Intervalable> removeIntervals = new TreeSet<>();

for (final Intervalable interval : intervals) {
// If the interval was already removed, ignore it
if (removeIntervals.contains(interval)) {
continue;
}

// Remove all overallping intervals
removeIntervals.addAll(findOverlaps(interval));
}

// Remove all intervals that were overlapping
for (final Intervalable removeInterval : removeIntervals) {
intervals.remove(removeInterval);
}

// Sort the intervals, now on left-most position only
sort(intervals, new IntervalableComparatorByPosition());

return intervals;
}

public List<Intervalable> findOverlaps(final Intervalable interval) {
return rootNode.findOverlaps(interval);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.ismartcoding.lib.ahocorasick.interval;

public interface Intervalable extends Comparable {

int getStart();

int getEnd();

int size();

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.ismartcoding.lib.ahocorasick.interval;

import java.util.Comparator;

public class IntervalableComparatorByPosition implements Comparator<Intervalable> {

@Override
public int compare(final Intervalable intervalable, final Intervalable intervalable2) {
return intervalable.getStart() - intervalable2.getStart();
}

}
Loading

0 comments on commit 50c0eed

Please sign in to comment.