Skip to content

Commit

Permalink
[INLONG-11369][Sort] KV split has error when there is a escape char w…
Browse files Browse the repository at this point in the history
…ithout before & and = in text
  • Loading branch information
baomingyu committed Oct 18, 2024
1 parent 1c21152 commit 79ac582
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,25 @@ public static List<Map<String, String>> splitKv(
*/
int kvState = STATE_KEY;

char lastCh = 0;
char nextCh = 0;
for (int i = 0; i < text.length(); ++i) {
char ch = text.charAt(i);
if ((i + 1) < text.length()) {
nextCh = text.charAt(i + 1);
} else {
nextCh = 0;
}
if (ch == kvDelimiter) {
switch (state) {
// match previous kv delimiter first when there are more than one kvDelimiter
case STATE_KEY:
key = stringBuilder.toString();
stringBuilder.setLength(0);
state = STATE_VALUE;
if (i == 0) {
stringBuilder.append(ch);
} else {
key = stringBuilder.toString();
stringBuilder.setLength(0);
state = STATE_VALUE;
}
break;
case STATE_VALUE:
stringBuilder.append(ch);
Expand All @@ -124,24 +134,19 @@ public static List<Map<String, String>> splitKv(
} else if (ch == entryDelimiter) {
switch (state) {
case STATE_KEY:
key = lastKey;
if (lastValue == null) {
value = ch + stringBuilder.toString();
} else {
value = lastValue + ch + stringBuilder.toString();
}
fields.put(key, value);
lastKey = key;
lastValue = value;
stringBuilder.setLength(0);
stringBuilder.append(ch);
break;
case STATE_VALUE:
value = stringBuilder.toString();
fields.put(key, value);
lastKey = key;
lastValue = value;
stringBuilder.setLength(0);
state = STATE_KEY;
if (nextCh == entryDelimiter) {
stringBuilder.append(ch);
} else {
value = stringBuilder.toString();
fields.put(key, value);
lastKey = key;
lastValue = value;
stringBuilder.setLength(0);
state = STATE_KEY;
}
break;
case STATE_ESCAPING:
stringBuilder.append(ch);
Expand All @@ -154,12 +159,6 @@ public static List<Map<String, String>> splitKv(
} else if (escapeChar != null && ch == escapeChar) {
switch (state) {
case STATE_KEY:
if (lastCh != 0) {
stringBuilder.append(lastCh);
}
kvState = state;
state = STATE_ESCAPING;
break;
case STATE_VALUE:
kvState = state;
state = STATE_ESCAPING;
Expand All @@ -175,12 +174,6 @@ public static List<Map<String, String>> splitKv(
} else if (quoteChar != null && ch == quoteChar) {
switch (state) {
case STATE_KEY:
if (lastCh != 0) {
stringBuilder.append(lastCh);
}
kvState = state;
state = STATE_QUOTING;
break;
case STATE_VALUE:
kvState = state;
state = STATE_QUOTING;
Expand All @@ -196,20 +189,26 @@ public static List<Map<String, String>> splitKv(
} else if (lineDelimiter != null && ch == lineDelimiter) {
switch (state) {
case STATE_KEY:
String remainingKey = stringBuilder.toString();
key = lastKey;
stringBuilder.append(lastValue).append(lastCh);
stringBuilder.setLength(0);
stringBuilder.append(lastValue).append(entryDelimiter).append(remainingKey);
value = stringBuilder.toString();
fields.put(key, value);
Map<String, String> copyFields = new HashMap<>();
copyFields.putAll(fields);
lines.add(copyFields);
stringBuilder.setLength(0);
fields.clear();
lastKey = null;
lastValue = null;
stringBuilder.setLength(0);
break;
case STATE_VALUE:
lastKey = null;
lastValue = null;
value = stringBuilder.toString();
fields.put(key, value);
Map<String, String> copyFields = new HashMap<>();
copyFields = new HashMap<>();
copyFields.putAll(fields);
lines.add(copyFields);
stringBuilder.setLength(0);
Expand All @@ -226,14 +225,22 @@ public static List<Map<String, String>> splitKv(
}
} else {
stringBuilder.append(ch);
switch (state) {
case STATE_ESCAPING:
state = kvState;
}
}
lastCh = ch;
}

switch (state) {
case STATE_KEY:
if (lastKey != null && lastValue != null && text != null) {
fields.put(lastKey, lastValue + lastCh);
String remainingKey = stringBuilder.toString();
key = lastKey;
stringBuilder.setLength(0);
stringBuilder.append(lastValue).append(entryDelimiter).append(remainingKey);
value = stringBuilder.toString();
fields.put(key, value);
}
lines.add(fields);
return lines;
Expand All @@ -244,14 +251,18 @@ public static List<Map<String, String>> splitKv(
return lines;
case STATE_ESCAPING:
case STATE_QUOTING:
value = stringBuilder.toString();
String oldValue = fields.get(key);
if (value != null && !"".equals(value)
&& oldValue != null && !"".equals(oldValue)) {
fields.put(key, oldValue + value);
} else if (value != null && !"".equals(value)) {
fields.put(key, value);
switch (kvState) {
case STATE_VALUE:
value = stringBuilder.toString();
fields.put(key, value);
case STATE_KEY:
if (lastKey != null) {
value = stringBuilder.toString();
String oldValue = fields.get(key);
fields.put(key, oldValue + entryDelimiter + value);
}
}

lines.add(fields);
return lines;
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@

import org.apache.inlong.sort.formats.util.StringUtils;

import org.junit.Assert;
import org.junit.Test;

import java.util.List;
import java.util.Map;

import static org.apache.inlong.sort.formats.util.StringUtils.splitKv;
import static org.junit.Assert.assertEquals;

public class StringUtilsTest {
Expand Down Expand Up @@ -55,17 +57,17 @@ public void testSplitKvString() {
'=', '\\', '\'', '\n');
assertEquals("=", map4.get(0).get("name"));
assertEquals("20&&", map4.get(0).get("age"));
assertEquals("=", map4.get(0).get("name1"));
assertEquals("20&&", map4.get(0).get("age1"));
assertEquals("=", map4.get(1).get("name1"));
assertEquals("20&&", map4.get(1).get("age1"));

String kvString5 = "name==&age=20&&\nname1==&age1=20&&&value=aaa&dddd&";
List<Map<String, String>> map5 = StringUtils.splitKv(kvString5, '&',
'=', '\\', '\'', '\n');
assertEquals("=", map5.get(0).get("name"));
assertEquals("20&&", map5.get(0).get("age"));
assertEquals("=", map5.get(0).get("name1"));
assertEquals("20&&", map5.get(0).get("age1"));
assertEquals("aaa&dddd&", map5.get(0).get("value"));
assertEquals("=", map5.get(1).get("name1"));
assertEquals("20&&", map5.get(1).get("age1"));
assertEquals("aaa&dddd&", map5.get(1).get("value"));

String kvString6 = "name==&age=20&&\\";
List<Map<String, String>> map6 = StringUtils.splitKv(kvString6, '&',
Expand Down Expand Up @@ -153,4 +155,13 @@ public void testSplitCsvStringWithMaxFields() {
assertEquals("home", csv1Array4[2][1]);
assertEquals("home", csv1Array4[2][2]);
}

@Test
public void testKvScapeCharSplit() {
String text = "k1=v1&\nk\\2=v2\\&&k3=v3";
Map<String, String> kvMap = splitKv(text, '&', '=', '\\', null);
Assert.assertTrue(kvMap != null && kvMap.size() == 3);
Assert.assertTrue(kvMap.get("k3") != null);
Assert.assertTrue(kvMap.get("\nk2") != null);
}
}

0 comments on commit 79ac582

Please sign in to comment.