Improved handling of invalid integers.

Type parsers broken out into separate package
This commit is contained in:
moandji.ezana 2014-08-12 17:29:56 +02:00
parent 010bc88a20
commit e2a085d310
8 changed files with 266 additions and 219 deletions

View file

@ -14,6 +14,7 @@ import com.moandjiezana.toml.values.ValueAnalysis;
public class TomlParser {
private static final Pattern MULTILINE_ARRAY_REGEX = Pattern.compile("\\s*\\[([^\\]]*)");
private static final Pattern MULTILINE_ARRAY_REGEX_END = Pattern.compile("\\s*\\]");
private static final ValueAnalysis VALUE_ANALYSIS = new ValueAnalysis();
private final Results results = new Results();
@ -103,9 +104,7 @@ public class TomlParser {
continue;
}
ValueAnalysis lineAnalysis = new ValueAnalysis(value);
Object convertedValue = lineAnalysis.getValue();
Object convertedValue = VALUE_ANALYSIS.convert(value);
if (convertedValue != INVALID) {
results.addValue(key, convertedValue);

View file

@ -0,0 +1,73 @@
package com.moandjiezana.toml.values;
import static com.moandjiezana.toml.values.ValueAnalysis.INVALID;
import java.util.ArrayList;
import java.util.List;
import org.parboiled.Parboiled;
import org.parboiled.parserunners.BasicParseRunner;
import org.parboiled.support.ParsingResult;
import com.moandjiezana.toml.StatementParser;
class ArrayParser implements ValueParser {
static final ArrayParser ARRAY_PARSER = new ArrayParser();
private static final List<Object> INVALID_ARRAY = new ArrayList<Object>();
private static final ValueAnalysis VALUE_ANALYSIS = new ValueAnalysis();
@Override
public boolean canParse(String s) {
return s.startsWith("[");
}
@Override
public Object parse(String s) {
StatementParser parser = Parboiled.createParser(StatementParser.class);
ParsingResult<List<Object>> parsingResult = new BasicParseRunner<List<Object>>(parser.Array()).run(s);
List<Object> tokens = parsingResult.resultValue;
List<Object> values = convertList(tokens);
if (values == INVALID_ARRAY) {
return INVALID;
}
return values;
}
private List<Object> convertList(List<Object> tokens) {
ArrayList<Object> nestedList = new ArrayList<Object>();
for (Object token : tokens) {
if (token instanceof String) {
Object converted = VALUE_ANALYSIS.convert(((String) token).trim());
if (converted == INVALID) {
return INVALID_ARRAY;
}
if (isHomogenousArray(converted, nestedList)) {
nestedList.add(converted);
} else {
return INVALID_ARRAY;
}
} else if (token instanceof List) {
@SuppressWarnings("unchecked")
List<Object> convertedList = convertList((List<Object>) token);
if (convertedList != INVALID_ARRAY && isHomogenousArray(convertedList, nestedList)) {
nestedList.add(convertedList);
} else {
return INVALID_ARRAY;
}
}
}
return nestedList;
}
private boolean isHomogenousArray(Object o, List<?> values) {
return values.isEmpty() || values.get(0).getClass().isAssignableFrom(o.getClass()) || o.getClass().isAssignableFrom(values.get(0).getClass());
}
private ArrayParser() {}
}

View file

@ -0,0 +1,37 @@
package com.moandjiezana.toml.values;
import static com.moandjiezana.toml.values.ValueAnalysis.INVALID;
import java.text.SimpleDateFormat;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DateParser implements ValueParser {
static final DateParser DATE_PARSER = new DateParser();
private static final Pattern DATE_REGEX = Pattern.compile("(\\d{4}-[0-1][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]Z)(.*)");
@Override
public boolean canParse(String s) {
Matcher matcher = DATE_REGEX.matcher(s);
return matcher.matches() && ValueParserUtils.isComment(matcher.group(2));
}
@Override
public Object parse(String s) {
Matcher matcher = DATE_REGEX.matcher(s);
matcher.matches();
s = matcher.group(1).replace("Z", "+00:00");
try {
s = s.substring(0, 22) + s.substring(23);
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
dateFormat.setLenient(false);
return dateFormat.parse(s);
} catch (Exception e) {
return INVALID;
}
}
private DateParser() {}
}

View file

@ -0,0 +1,28 @@
package com.moandjiezana.toml.values;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class FloatParser implements ValueParser {
public static final FloatParser FLOAT_PARSER = new FloatParser();
private static final Pattern FLOAT_REGEX = Pattern.compile("(-?\\d+\\.\\d+)(.*)");
@Override
public boolean canParse(String s) {
Matcher matcher = FLOAT_REGEX.matcher(s);
return matcher.matches() && ValueParserUtils.isComment(matcher.group(2));
}
@Override
public Object parse(String s) {
Matcher matcher = FLOAT_REGEX.matcher(s);
matcher.matches();
return Double.valueOf(matcher.group(1));
}
private FloatParser() {}
}

View file

@ -0,0 +1,28 @@
package com.moandjiezana.toml.values;
import static com.moandjiezana.toml.values.ValueParserUtils.isComment;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IntegerParser implements ValueParser {
private static final Pattern INTEGER_REGEX = Pattern.compile("(-?[0-9]*)(.*)");
static final IntegerParser INTEGER_PARSER = new IntegerParser();
@Override
public boolean canParse(String s) {
Matcher matcher = INTEGER_REGEX.matcher(s);
return matcher.matches() && isComment(matcher.group(2));
}
@Override
public Object parse(String s) {
Matcher matcher = INTEGER_REGEX.matcher(s);
matcher.matches();
return Long.valueOf(matcher.group(1));
}
}

View file

@ -0,0 +1,77 @@
package com.moandjiezana.toml.values;
import static com.moandjiezana.toml.values.ValueAnalysis.INVALID;
import static com.moandjiezana.toml.values.ValueParserUtils.isComment;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class StringParser implements ValueParser {
static final StringParser STRING_PARSER = new StringParser();
private static final Pattern UNICODE_REGEX = Pattern.compile("\\\\u(.*)");
@Override
public boolean canParse(String s) {
return s.startsWith("\"");
}
@Override
public Object parse(String value) {
int stringTerminator = -1;
char[] chars = value.toCharArray();
for (int i = 1; i < chars.length; i++) {
char ch = chars[i];
if (ch == '"' && chars[i - 1] != '\\') {
stringTerminator = i;
break;
}
}
if (stringTerminator == -1 || !isComment(value.substring(stringTerminator + 1))) {
return INVALID;
}
value = value.substring(1, stringTerminator);
value = replaceUnicodeCharacters(value);
chars = value.toCharArray();
for (int i = 0; i < chars.length - 1; i++) {
char ch = chars[i];
char next = chars[i + 1];
if (ch == '\\' && next == '\\') {
i++;
} else if (ch == '\\' && !(next == 'b' || next == 'f' || next == 'n' || next == 't' || next == 'r' || next == '"' || next == '/' || next == '\\')) {
return INVALID;
}
}
value = replaceSpecialCharacters(value);
return value;
}
private String replaceUnicodeCharacters(String value) {
Matcher unicodeMatcher = UNICODE_REGEX.matcher(value);
while (unicodeMatcher.find()) {
value = value.replace(unicodeMatcher.group(), new String(Character.toChars(Integer.parseInt(unicodeMatcher.group(1), 16))));
}
return value;
}
private String replaceSpecialCharacters(String value) {
return value.replace("\\n", "\n")
.replace("\\\"", "\"")
.replace("\\t", "\t")
.replace("\\r", "\r")
.replace("\\\\", "\\")
.replace("\\/", "/")
.replace("\\b", "\b")
.replace("\\f", "\f");
}
private StringParser() {}
}

View file

@ -1,230 +1,30 @@
package com.moandjiezana.toml.values;
import static com.moandjiezana.toml.values.ArrayParser.ARRAY_PARSER;
import static com.moandjiezana.toml.values.BooleanParser.BOOLEAN_PARSER;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.parboiled.Parboiled;
import org.parboiled.parserunners.BasicParseRunner;
import org.parboiled.support.ParsingResult;
import com.moandjiezana.toml.StatementParser;
import static com.moandjiezana.toml.values.DateParser.DATE_PARSER;
import static com.moandjiezana.toml.values.FloatParser.FLOAT_PARSER;
import static com.moandjiezana.toml.values.IntegerParser.INTEGER_PARSER;
import static com.moandjiezana.toml.values.StringParser.STRING_PARSER;
public class ValueAnalysis {
public static final Object INVALID = new Object();
private static final List<Object> INVALID_ARRAY = new ArrayList<Object>();
private static final Pattern BOOLEAN_REGEX = Pattern.compile("(true|false)(.*)");
private static final Pattern FLOAT_REGEX = Pattern.compile("(-?\\d+\\.\\d+)(.*)");
private static final Pattern INTEGER_REGEX = Pattern.compile("(-?[0-9]*)(.*)");
private static final Pattern DATE_REGEX = Pattern.compile("(\\d{4}-[0-1][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]Z)(.*)");
private static final Pattern UNICODE_REGEX = Pattern.compile("\\\\u(.*)");
private final String rawValue;
private Matcher chosenMatcher;
public ValueAnalysis(String value) {
this.rawValue = value.trim();
}
public Object getValue() {
return convert(rawValue);
}
private Object convert(String value) {
if (isString(value)) {
return convertString(value);
} else if (isInteger(value)) {
return Long.valueOf(chosenMatcher.group(1));
} else if (isFloat(value)) {
return Double.valueOf(chosenMatcher.group(1));
public Object convert(String value) {
if (STRING_PARSER.canParse(value)) {
return STRING_PARSER.parse(value);
} else if (INTEGER_PARSER.canParse(value)) {
return INTEGER_PARSER.parse(value);
} else if (FLOAT_PARSER.canParse(value)) {
return FLOAT_PARSER.parse(value);
} else if (BOOLEAN_PARSER.canParse(value)) {
return BOOLEAN_PARSER.parse(value);
} else if (isArray(value)) {
StatementParser parser = Parboiled.createParser(StatementParser.class);
ParsingResult<List<Object>> parsingResult = new BasicParseRunner<List<Object>>(parser.Array()).run(value);
List<Object> tokens = parsingResult.resultValue;
List<Object> values = convertList(tokens);
if (values == INVALID_ARRAY) {
return INVALID;
}
return values;
} else if (isDate(value)) {
String s = chosenMatcher.group(1).replace("Z", "+00:00");
try {
s = s.substring(0, 22) + s.substring(23);
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
dateFormat.setLenient(false);
return dateFormat.parse(s);
} catch (Exception e) {
return INVALID;
}
} else if (ARRAY_PARSER.canParse(value)) {
return ARRAY_PARSER.parse(value);
} else if (DATE_PARSER.canParse(value)) {
return DATE_PARSER.parse(value);
} else {
return INVALID;
}
}
private boolean isString(String value) {
return value.startsWith("\"");
}
private boolean isFloat(String value) {
Matcher matcher = FLOAT_REGEX.matcher(value);
if (matcher.matches() && isComment(matcher.group(2))) {
chosenMatcher = matcher;
return true;
}
return false;
}
private boolean isDate(String value) {
Matcher matcher = DATE_REGEX.matcher(value);
if (matcher.matches()) {
chosenMatcher = matcher;
return true;
}
return false;
}
private boolean isInteger(String s) {
Matcher matcher = INTEGER_REGEX.matcher(s);
if (matcher.matches() && isComment(matcher.group(2))) {
chosenMatcher = matcher;
return true;
}
return false;
}
private boolean isArray(String s) {
return s.startsWith("[");
}
private boolean isHomogenousArray(Object o, List<?> values) {
return values.isEmpty() || values.get(0).getClass().isAssignableFrom(o.getClass()) || o.getClass().isAssignableFrom(values.get(0).getClass());
}
private boolean isBoolean(String s) {
Matcher matcher = BOOLEAN_REGEX.matcher(s);
if (matcher.matches()) {
chosenMatcher = matcher;
return true;
}
return false;
}
public static boolean isComment(String line) {
if (line == null || line.isEmpty()) {
return true;
}
char[] chars = line.toCharArray();
for (char c : chars) {
if (Character.isWhitespace(c)) {
continue;
}
return c == '#';
}
return false;
}
private Object convertString(String value) {
int stringTerminator = -1;
char[] chars = value.toCharArray();
for (int i = 1; i < chars.length; i++) {
char ch = chars[i];
if (ch == '"' && chars[i - 1] != '\\') {
stringTerminator = i;
break;
}
}
if (stringTerminator == -1 || !isComment(value.substring(stringTerminator + 1))) {
return INVALID;
}
value = value.substring(1, stringTerminator);
value = replaceUnicodeCharacters(value);
chars = value.toCharArray();
for (int i = 0; i < chars.length - 1; i++) {
char ch = chars[i];
char next = chars[i + 1];
if (ch == '\\' && next == '\\') {
i++;
} else if (ch == '\\' && !(next == 'b' || next == 'f' || next == 'n' || next == 't' || next == 'r' || next == '"' || next == '/' || next == '\\')) {
return INVALID;
}
}
value = replaceSpecialCharacters(value);
return value;
}
private List<Object> convertList(List<Object> tokens) {
ArrayList<Object> nestedList = new ArrayList<Object>();
for (Object token : tokens) {
if (token instanceof String) {
Object converted = convert(((String) token).trim());
if (converted == INVALID) {
return INVALID_ARRAY;
}
if (isHomogenousArray(converted, nestedList)) {
nestedList.add(converted);
} else {
return INVALID_ARRAY;
}
} else if (token instanceof List) {
@SuppressWarnings("unchecked")
List<Object> convertedList = convertList((List<Object>) token);
if (convertedList != INVALID_ARRAY && isHomogenousArray(convertedList, nestedList)) {
nestedList.add(convertedList);
} else {
return INVALID_ARRAY;
}
}
}
return nestedList;
}
private String replaceUnicodeCharacters(String value) {
Matcher unicodeMatcher = UNICODE_REGEX.matcher(value);
while (unicodeMatcher.find()) {
value = value.replace(unicodeMatcher.group(), new String(Character.toChars(Integer.parseInt(unicodeMatcher.group(1), 16))));
}
return value;
}
private String replaceSpecialCharacters(String value) {
return value.replace("\\n", "\n")
.replace("\\\"", "\"")
.replace("\\t", "\t")
.replace("\\r", "\r")
.replace("\\\\", "\\")
.replace("\\/", "/")
.replace("\\b", "\b")
.replace("\\f", "\f");
}
}

View file

@ -352,6 +352,11 @@ public class TomlTest {
public void should_fail_when_illegal_characters_after_key() throws Exception {
new Toml().parse("number = 3.14 pi");
}
@Test(expected = IllegalStateException.class)
public void should_fail_when_illegal_characters_after_integer() throws Exception {
new Toml().parse("number = 314 pi");
}
@Test(expected = IllegalStateException.class)
public void should_fail_on_float_without_leading_0() {