-
-
Notifications
You must be signed in to change notification settings - Fork 128
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5f84ed3
commit 50c0eed
Showing
40 changed files
with
1,826 additions
and
21 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
105 changes: 105 additions & 0 deletions
105
lib/src/main/java/com/ismartcoding/lib/ahocorasick/interval/Interval.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
package com.ismartcoding.lib.ahocorasick.interval; | ||
|
||
|
||
/** | ||
* Responsible for tracking the start and end bounds, which are reused by | ||
* both {@link Emit} and {@link PayloadEmit}. | ||
*/ | ||
public class Interval implements Intervalable { | ||
|
||
private final int start; | ||
private final int end; | ||
|
||
/** | ||
* Constructs an interval with a start and end position. | ||
* | ||
* @param start The interval's starting text position. | ||
* @param end The interval's ending text position. | ||
*/ | ||
public Interval(final int start, final int end) { | ||
this.start = start; | ||
this.end = end; | ||
} | ||
|
||
/** | ||
* Returns the starting offset into the text for this interval. | ||
* | ||
* @return A number between 0 (start of text) and the text length. | ||
*/ | ||
@Override | ||
public int getStart() { | ||
return this.start; | ||
} | ||
|
||
/** | ||
* Returns the ending offset into the text for this interval. | ||
* | ||
* @return A number between getStart() + 1 and the text length. | ||
*/ | ||
@Override | ||
public int getEnd() { | ||
return this.end; | ||
} | ||
|
||
/** | ||
* Returns the length of the interval. | ||
* | ||
* @return The end position less the start position, plus one. | ||
*/ | ||
@Override | ||
public int size() { | ||
return end - start + 1; | ||
} | ||
|
||
/** | ||
* Answers whether the given interval overlaps this interval | ||
* instance. | ||
* | ||
* @param other the other interval to check for overlap | ||
* @return true The intervals overlap. | ||
*/ | ||
public boolean overlapsWith(final Interval other) { | ||
return this.start <= other.getEnd() && | ||
this.end >= other.getStart(); | ||
} | ||
|
||
public boolean overlapsWith(int point) { | ||
return this.start <= point && point <= this.end; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (!(o instanceof Intervalable)) { | ||
return false; | ||
} | ||
Intervalable other = (Intervalable) o; | ||
return this.start == other.getStart() && | ||
this.end == other.getEnd(); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return this.start % 100 + this.end % 100; | ||
} | ||
|
||
@Override | ||
public int compareTo(Object o) { | ||
if (!(o instanceof Intervalable)) { | ||
return -1; | ||
} | ||
Intervalable other = (Intervalable) o; | ||
int comparison = this.start - other.getStart(); | ||
return comparison != 0 ? comparison : this.end - other.getEnd(); | ||
} | ||
|
||
/** | ||
* Returns the starting offset and ending offset separated | ||
* by a full colon (:). | ||
* | ||
* @return A non-null String, never empty. | ||
*/ | ||
@Override | ||
public String toString() { | ||
return this.start + ":" + this.end; | ||
} | ||
} |
123 changes: 123 additions & 0 deletions
123
lib/src/main/java/com/ismartcoding/lib/ahocorasick/interval/IntervalNode.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package com.ismartcoding.lib.ahocorasick.interval; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
public class IntervalNode { | ||
|
||
private enum Direction {LEFT, RIGHT} | ||
|
||
private IntervalNode left; | ||
private IntervalNode right; | ||
private int point; | ||
private List<Intervalable> intervals = new ArrayList<>(); | ||
|
||
public IntervalNode(final List<Intervalable> intervals) { | ||
this.point = determineMedian(intervals); | ||
|
||
final List<Intervalable> toLeft = new ArrayList<>(); | ||
final List<Intervalable> toRight = new ArrayList<>(); | ||
|
||
for (Intervalable interval : intervals) { | ||
if (interval.getEnd() < this.point) { | ||
toLeft.add(interval); | ||
} else if (interval.getStart() > this.point) { | ||
toRight.add(interval); | ||
} else { | ||
this.intervals.add(interval); | ||
} | ||
} | ||
|
||
if (toLeft.size() > 0) { | ||
this.left = new IntervalNode(toLeft); | ||
} | ||
if (toRight.size() > 0) { | ||
this.right = new IntervalNode(toRight); | ||
} | ||
} | ||
|
||
public int determineMedian(final List<Intervalable> intervals) { | ||
int start = -1; | ||
int end = -1; | ||
for (Intervalable interval : intervals) { | ||
int currentStart = interval.getStart(); | ||
int currentEnd = interval.getEnd(); | ||
if (start == -1 || currentStart < start) { | ||
start = currentStart; | ||
} | ||
if (end == -1 || currentEnd > end) { | ||
end = currentEnd; | ||
} | ||
} | ||
return (start + end) / 2; | ||
} | ||
|
||
public List<Intervalable> findOverlaps(final Intervalable interval) { | ||
final List<Intervalable> overlaps = new ArrayList<>(); | ||
|
||
if (this.point < interval.getStart()) { | ||
// Tends to the right | ||
addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval)); | ||
addToOverlaps(interval, overlaps, checkForOverlapsToTheRight(interval)); | ||
} else if (this.point > interval.getEnd()) { | ||
// Tends to the left | ||
addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval)); | ||
addToOverlaps(interval, overlaps, checkForOverlapsToTheLeft(interval)); | ||
} else { | ||
// Somewhere in the middle | ||
addToOverlaps(interval, overlaps, this.intervals); | ||
addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval)); | ||
addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval)); | ||
} | ||
|
||
return overlaps; | ||
} | ||
|
||
protected void addToOverlaps( | ||
final Intervalable interval, | ||
final List<Intervalable> overlaps, | ||
final List<Intervalable> newOverlaps) { | ||
for (final Intervalable currentInterval : newOverlaps) { | ||
if (!currentInterval.equals(interval)) { | ||
overlaps.add(currentInterval); | ||
} | ||
} | ||
} | ||
|
||
protected List<Intervalable> checkForOverlapsToTheLeft(final Intervalable interval) { | ||
return checkForOverlaps(interval, Direction.LEFT); | ||
} | ||
|
||
protected List<Intervalable> checkForOverlapsToTheRight(final Intervalable interval) { | ||
return checkForOverlaps(interval, Direction.RIGHT); | ||
} | ||
|
||
protected List<Intervalable> checkForOverlaps( | ||
final Intervalable interval, final Direction direction) { | ||
final List<Intervalable> overlaps = new ArrayList<>(); | ||
|
||
for (final Intervalable currentInterval : this.intervals) { | ||
switch (direction) { | ||
case LEFT: | ||
if (currentInterval.getStart() <= interval.getEnd()) { | ||
overlaps.add(currentInterval); | ||
} | ||
break; | ||
case RIGHT: | ||
if (currentInterval.getEnd() >= interval.getStart()) { | ||
overlaps.add(currentInterval); | ||
} | ||
break; | ||
} | ||
} | ||
|
||
return overlaps; | ||
} | ||
|
||
protected List<Intervalable> findOverlappingRanges(IntervalNode node, Intervalable interval) { | ||
return node == null | ||
? Collections.<Intervalable>emptyList() | ||
: node.findOverlaps(interval); | ||
} | ||
} |
49 changes: 49 additions & 0 deletions
49
lib/src/main/java/com/ismartcoding/lib/ahocorasick/interval/IntervalTree.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package com.ismartcoding.lib.ahocorasick.interval; | ||
|
||
import java.util.List; | ||
import java.util.Set; | ||
import java.util.TreeSet; | ||
|
||
import static java.util.Collections.sort; | ||
|
||
public class IntervalTree { | ||
|
||
private final IntervalNode rootNode; | ||
|
||
public IntervalTree(List<Intervalable> intervals) { | ||
this.rootNode = new IntervalNode(intervals); | ||
} | ||
|
||
public List<Intervalable> removeOverlaps(final List<Intervalable> intervals) { | ||
|
||
// Sort the intervals on size, then left-most position | ||
sort(intervals, new IntervalableComparatorBySize()); | ||
|
||
final Set<Intervalable> removeIntervals = new TreeSet<>(); | ||
|
||
for (final Intervalable interval : intervals) { | ||
// If the interval was already removed, ignore it | ||
if (removeIntervals.contains(interval)) { | ||
continue; | ||
} | ||
|
||
// Remove all overallping intervals | ||
removeIntervals.addAll(findOverlaps(interval)); | ||
} | ||
|
||
// Remove all intervals that were overlapping | ||
for (final Intervalable removeInterval : removeIntervals) { | ||
intervals.remove(removeInterval); | ||
} | ||
|
||
// Sort the intervals, now on left-most position only | ||
sort(intervals, new IntervalableComparatorByPosition()); | ||
|
||
return intervals; | ||
} | ||
|
||
public List<Intervalable> findOverlaps(final Intervalable interval) { | ||
return rootNode.findOverlaps(interval); | ||
} | ||
|
||
} |
11 changes: 11 additions & 0 deletions
11
lib/src/main/java/com/ismartcoding/lib/ahocorasick/interval/Intervalable.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
package com.ismartcoding.lib.ahocorasick.interval; | ||
|
||
public interface Intervalable extends Comparable { | ||
|
||
int getStart(); | ||
|
||
int getEnd(); | ||
|
||
int size(); | ||
|
||
} |
12 changes: 12 additions & 0 deletions
12
...main/java/com/ismartcoding/lib/ahocorasick/interval/IntervalableComparatorByPosition.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package com.ismartcoding.lib.ahocorasick.interval; | ||
|
||
import java.util.Comparator; | ||
|
||
public class IntervalableComparatorByPosition implements Comparator<Intervalable> { | ||
|
||
@Override | ||
public int compare(final Intervalable intervalable, final Intervalable intervalable2) { | ||
return intervalable.getStart() - intervalable2.getStart(); | ||
} | ||
|
||
} |
Oops, something went wrong.