Skip to content

Commit

Permalink
[#16] Joining collector: renamings, JavaDoc
Browse files Browse the repository at this point in the history
  • Loading branch information
amaembo committed Nov 8, 2015
1 parent 154704d commit 8d10b3a
Show file tree
Hide file tree
Showing 2 changed files with 291 additions and 75 deletions.
264 changes: 240 additions & 24 deletions src/main/java/javax/util/streamex/Joining.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@
import static javax.util.streamex.StreamExInternals.*;

/**
* An advanced implementation of joining {@link Collector} which is capable to
* join the input {@code CharSequence} elements with given delimiter optionally
* wrapping into given prefix and suffix and optionally limiting the length of
* the resulting string (in UTF-16 chars, codepoints or Unicode symbols) adding
* the specified ellipsis symbol. This collector supercedes the standard JDK
* An advanced implementation of joining {@link Collector}. This collector is
* capable to join the input {@code CharSequence} elements with given delimiter
* optionally wrapping into given prefix and suffix and optionally limiting the
* length of the resulting string (in Unicode code units, code points or
* grapheme clusters) adding the specified ellipsis sequence. This collector
* supersedes the standard JDK
* {@link Collectors#joining(CharSequence, CharSequence, CharSequence)}
* collectors family.
*
Expand All @@ -62,8 +63,8 @@
*
* <p>
* The intermediate accumulation type of this collector is the implementation
* detail and not exposed to the API. If you want to cast it to {@code Collector}
* type, use ? as accumulator type variable:
* detail and not exposed to the API. If you want to cast it to
* {@code Collector} type, use ? as accumulator type variable:
*
* <pre>{@code
* Collector<CharSequence, ?, String> joining = Joining.with(", ");
Expand All @@ -80,14 +81,14 @@ static final class Accumulator {

private static final int CUT_ANYWHERE = 0;
private static final int CUT_CODEPOINT = 1;
private static final int CUT_SYMBOL = 2;
private static final int CUT_GRAPHEME = 2;
private static final int CUT_WORD = 3;
private static final int CUT_BEFORE_DELIMITER = 4;
private static final int CUT_AFTER_DELIMITER = 5;

private static final int LENGTH_CHARS = 0;
private static final int LENGTH_CODEPOINTS = 1;
private static final int LENGTH_SYMBOLS = 2;
private static final int LENGTH_GRAPHEMES = 2;

private final String delimiter, ellipsis, prefix, suffix;
private final int cutStrategy, lenStrategy, maxLength;
Expand All @@ -103,9 +104,9 @@ private Joining(String delimiter, String ellipsis, String prefix, String suffix,
this.lenStrategy = lenStrategy;
this.maxLength = maxLength;
}

private void init() {
if(delimCount == -1) {
if (delimCount == -1) {
limit = maxLength - length(prefix) - length(suffix);
delimCount = length(delimiter);
}
Expand All @@ -119,7 +120,7 @@ private int length(CharSequence s) {
if (s instanceof String)
return ((String) s).codePointCount(0, s.length());
return (int) s.codePoints().count();
case LENGTH_SYMBOLS:
case LENGTH_GRAPHEMES:
BreakIterator bi = BreakIterator.getCharacterInstance();
bi.setText(s.toString());
int count = 0;
Expand Down Expand Up @@ -149,10 +150,11 @@ private int copyCut(char[] buf, int pos, String str, int limit, int cutStrategy)
if (limit < str.codePointCount(0, str.length()))
endPos = str.offsetByCodePoints(0, limit);
break;
case LENGTH_SYMBOLS:
case LENGTH_GRAPHEMES:
BreakIterator bi = BreakIterator.getCharacterInstance();
bi.setText(str);
int count = limit, end = 0;
int count = limit,
end = 0;
while (true) {
end = bi.next();
if (end == BreakIterator.DONE)
Expand All @@ -178,7 +180,7 @@ private int copyCut(char[] buf, int pos, String str, int limit, int cutStrategy)
bi.setText(str);
endPos = bi.preceding(endPos + 1);
break;
case CUT_SYMBOL:
case CUT_GRAPHEME:
bi = BreakIterator.getCharacterInstance();
bi.setText(str);
endPos = bi.preceding(endPos + 1);
Expand Down Expand Up @@ -220,52 +222,266 @@ private Joining withLimit(int lenStrategy, int maxLength) {
private Joining withCut(int cutStrategy) {
return new Joining(delimiter, ellipsis, prefix, suffix, cutStrategy, lenStrategy, maxLength);
}


/**
* Returns a {@code Collector} that concatenates the input elements,
* separated by the specified delimiter, in encounter order.
*
* <p>
* This collector is similar to {@link Collectors#joining(CharSequence)},
* but can be further set up in a flexible way, for example, specifying the
* maximal allowed length of the resulting {@code String}.
*
* @param delimiter
* the delimiter to be used between each element
* @return A {@code Collector} which concatenates CharSequence elements,
* separated by the specified delimiter, in encounter order
* @see Collectors#joining(CharSequence)
*/
public static Joining with(CharSequence delimiter) {
return new Joining(delimiter.toString(), "...", "", "", CUT_CODEPOINT, LENGTH_CHARS, -1);
return new Joining(delimiter.toString(), "...", "", "", CUT_GRAPHEME, LENGTH_CHARS, -1);
}

/**
* Returns a {@code Collector} which behaves like this collector, but
* additionally wraps the result with the specified prefix and suffix.
*
* <p>
* The collector returned by
* {@code Joining.with(delimiter).wrap(prefix, suffix)} is equivalent to
* {@link Collectors#joining(CharSequence, CharSequence, CharSequence)}, but
* can be further set up in a flexible way, for example, specifying the
* maximal allowed length of the resulting {@code String}.
*
* <p>
* If length limit is specified for the collector, the prefix length and the
* suffix length are also counted towards this limit. If the length of the
* prefix and the suffix exceed the limit, the resulting collector will not
* accumulate any elements and produce the same output. For example,
* {@code stream.collect(Joining.with(",").wrap("prefix", "suffix").maxChars(9))}
* will produce {@code "prefixsuf"} string regardless of the input stream
* content.
*
* <p>
* You may wrap several times:
* {@code Joining.with(",").wrap("[", "]").wrap("(", ")")} is equivalent to
* {@code Joining.with(",").wrap("([", "])")}.
*
* @param prefix
* the sequence of characters to be used at the beginning of the
* joined result
* @param suffix
* the sequence of characters to be used at the end of the joined
* result
* @return a new {@code Collector} which wraps the result with the specified
* prefix and suffix.
*/
public Joining wrap(CharSequence prefix, CharSequence suffix) {
return new Joining(delimiter, ellipsis, prefix.toString().concat(this.prefix), this.suffix.concat(suffix
.toString()), cutStrategy, lenStrategy, maxLength);
}

/**
* Returns a {@code Collector} which behaves like this collector, but uses
* the specified ellipsis {@code CharSequence} instead of default
* {@code "..."} when the string limit (if specified) is reached.
*
* @param ellipsis
* the sequence of characters to be used at the end of the joined
* result to designate that not all of the input elements are
* joined due to the specified string length restriction.
* @return a new {@code Collector} which will use the specified ellipsis
* instead of current setting.
*/
public Joining ellipsis(CharSequence ellipsis) {
return new Joining(delimiter, ellipsis.toString(), prefix, suffix, cutStrategy, lenStrategy, maxLength);
}

/**
* Returns a {@code Collector} which behaves like this collector, but sets
* the maximal length of the resulting string to the specified number of
* UTF-16 characters (or Unicode code units). This setting overwrites any
* limit previously set by {@link #maxChars(int)},
* {@link #maxCodePoints(int)} or {@link #maxGraphemes(int)} call.
*
* <p>
* The {@code String} produced by the resulting collector is guaranteed to
* have {@link String#length() length} which does not exceed the specified
* limit. An ellipsis sequence (by default {@code "..."}) is used to
* designate whether the limit was reached. Use
* {@link #ellipsis(CharSequence)} to set custom ellipsis sequence.
*
* <p>
* The collector returned by this method is <a
* href="package-summary.html#ShortCircuitReduction">short-circuiting</a>:
* it may not process all the input elements if the limit is reached.
*
* @param limit
* the maximal number of UTF-16 characters in the resulting
* String.
* @return a new {@code Collector} which will produce String no longer than
* given limit.
*/
public Joining maxChars(int limit) {
return withLimit(LENGTH_CHARS, limit);
}

/**
* Returns a {@code Collector} which behaves like this collector, but sets
* the maximal number of Unicode code points of the resulting string. This
* setting overwrites any limit previously set by {@link #maxChars(int)},
* {@link #maxCodePoints(int)} or {@link #maxGraphemes(int)} call.
*
* <p>
* The {@code String} produced by the resulting collector is guaranteed to
* have no more code points than the specified limit. An ellipsis sequence
* (by default {@code "..."}) is used to designate whether the limit was
* reached. Use {@link #ellipsis(CharSequence)} to set custom ellipsis
* sequence.
*
* <p>
* The collector returned by this method is <a
* href="package-summary.html#ShortCircuitReduction">short-circuiting</a>:
* it may not process all the input elements if the limit is reached.
*
* @param limit
* the maximal number of code points in the resulting String.
* @return a new {@code Collector} which will produce String no longer than
* given limit.
*/
public Joining maxCodePoints(int limit) {
return withLimit(LENGTH_CODEPOINTS, limit);
}

public Joining maxSymbols(int limit) {
return withLimit(LENGTH_SYMBOLS, limit);
/**
* Returns a {@code Collector} which behaves like this collector, but sets
* the maximal number of grapheme clusters. This setting overwrites any
* limit previously set by {@link #maxChars(int)},
* {@link #maxCodePoints(int)} or {@link #maxGraphemes(int)} call.
*
* <p>
* The grapheme cluster is defined in {@linkplain http
* ://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries Unicode Text
* Segmentation} technical report. Basically, it counts base character and
* the following combining characters as single object. The {@code String}
* produced by the resulting collector is guaranteed to have no more
* grapheme clusters than the specified limit. An ellipsis sequence (by
* default {@code "..."}) is used to designate whether the limit was
* reached. Use {@link #ellipsis(CharSequence)} to set custom ellipsis
* sequence.
*
* <p>
* The collector returned by this method is <a
* href="package-summary.html#ShortCircuitReduction">short-circuiting</a>:
* it may not process all the input elements if the limit is reached.
*
* @param limit
* the maximal number of grapheme clusters in the resulting
* String.
* @return a new {@code Collector} which will produce String no longer than
* given limit.
*/
public Joining maxGraphemes(int limit) {
return withLimit(LENGTH_GRAPHEMES, limit);
}

/**
* Returns a {@code Collector} which behaves like this collector, but cuts
* the resulting string at any point when limit is reached.
*
* <p>
* The resulting collector will produce {@code String} which length is
* exactly equal to the specified limit if the limit is reached. If used
* with {@link #maxChars(int)}, the resulting string may be cut in the
* middle of surrogate pair.
*
* @return a new {@code Collector} which cuts the resulting string at any
* point when limit is reached.
*/
public Joining cutAnywhere() {
return withCut(CUT_ANYWHERE);
}

public Joining cutAfterCodePoint() {
/**
* Returns a {@code Collector} which behaves like this collector, but cuts
* the resulting string between any code points when limit is reached.
*
* <p>
* The resulting collector will not split the surrogate pair when used with
* {@link #maxChars(int)} or {@link #maxCodePoints(int)}. However it may
* remove the combining character which may result in incorrect rendering of
* the last displayed grapheme.
*
* @return a new {@code Collector} which cuts the resulting string between
* code points.
*/
public Joining cutAtCodePoint() {
return withCut(CUT_CODEPOINT);
}

public Joining cutAfterSymbol() {
return withCut(CUT_SYMBOL);
/**
* Returns a {@code Collector} which behaves like this collector, but cuts
* the resulting string at grapheme cluster boundary when limit is reached.
* This is the default behavior.
*
* <p>
* The grapheme cluster is defined in {@linkplain http
* ://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries Unicode Text
* Segmentation} technical report. Thus the resulting collector will not
* split the surrogate pair and will preserve any combining characters or
* remove them with the base character.
*
* @return a new {@code Collector} which cuts the resulting string at
* grapheme cluster boundary.
*/
public Joining cutAtGrapheme() {
return withCut(CUT_GRAPHEME);
}

public Joining cutAfterWord() {
/**
* Returns a {@code Collector} which behaves like this collector, but cuts
* the resulting string at word boundary when limit is reached.
*
* <p>
* The beginning and end of every input stream element or delimiter is
* always considered as word boundary, so the stream of
* {@code "one", "two three"} collected with
* {@code Joining.with("").maxChars(n).ellipsis("").cutAfterWord()} may
* produce the following strings depending on {@code n}:
*
* <pre>{@code
* ""
* "one"
* "onetwo"
* "onetwo "
* "onetwo three"
* }</pre>
*
* @return a new {@code Collector} which cuts the resulting string at word
* boundary.
*/
public Joining cutAtWord() {
return withCut(CUT_WORD);
}

/**
* Returns a {@code Collector} which behaves like this collector, but cuts
* the resulting string before the delimiter when limit is reached.
*
* @return a new {@code Collector} which cuts the resulting string at before
* the delimiter.
*/
public Joining cutBeforeDelimiter() {
return withCut(CUT_BEFORE_DELIMITER);
}

/**
* Returns a {@code Collector} which behaves like this collector, but cuts
* the resulting string after the delimiter when limit is reached.
*
* @return a new {@code Collector} which cuts the resulting string at after
* the delimiter.
*/
public Joining cutAfterDelimiter() {
return withCut(CUT_AFTER_DELIMITER);
}
Expand Down Expand Up @@ -332,7 +548,7 @@ public BinaryOperator<Accumulator> combiner() {
return acc1;
};
}

@Override
public Function<Accumulator, String> finisher() {
if (maxLength == -1) {
Expand Down
Loading

0 comments on commit 8d10b3a

Please sign in to comment.