From 3e2d9fad370a18bbc44b86ee9f03161b99bfbe5c Mon Sep 17 00:00:00 2001 From: "moandji.ezana" Date: Wed, 11 Feb 2015 15:54:51 +0200 Subject: [PATCH] Simplified TomlParser --- .../com/moandjiezana/toml/ArrayConverter.java | 30 +-- .../com/moandjiezana/toml/DateConverter.java | 44 +++- .../com/moandjiezana/toml/Identifier.java | 60 +++++ .../toml/IdentifierConverter.java | 33 +++ .../toml/InlineTableConverter.java | 2 +- .../toml/MultilineLiteralStringConverter.java | 9 + .../toml/MultilineStringConverter.java | 10 +- .../java/com/moandjiezana/toml/Results.java | 33 ++- .../com/moandjiezana/toml/TomlParser.java | 240 ++++-------------- .../toml/ValueConverterUtils.java | 12 + .../moandjiezana/toml/ValueConverters.java | 15 +- 11 files changed, 263 insertions(+), 225 deletions(-) create mode 100644 src/main/java/com/moandjiezana/toml/Identifier.java create mode 100644 src/main/java/com/moandjiezana/toml/IdentifierConverter.java diff --git a/src/main/java/com/moandjiezana/toml/ArrayConverter.java b/src/main/java/com/moandjiezana/toml/ArrayConverter.java index 90849bb..770b7e5 100644 --- a/src/main/java/com/moandjiezana/toml/ArrayConverter.java +++ b/src/main/java/com/moandjiezana/toml/ArrayConverter.java @@ -1,6 +1,7 @@ package com.moandjiezana.toml; import static com.moandjiezana.toml.ValueConverterUtils.INVALID; +import static com.moandjiezana.toml.ValueConverters.CONVERTERS; import java.util.ArrayList; import java.util.List; @@ -10,8 +11,6 @@ class ArrayConverter implements ValueConverter { static final ArrayConverter ARRAY_PARSER = new ArrayConverter(); - private static final ValueConverters VALUE_CONVERTERS = new ValueConverters(); - @Override public boolean canConvert(String s) { return s.startsWith("["); @@ -41,36 +40,35 @@ class ArrayConverter implements ValueConverter { @Override public Object convert(String s, AtomicInteger index) { + int startIndex = index.get(); char[] chars = s.toCharArray(); List arrayItems = new ArrayList(); boolean terminated = false; + boolean inComment = false; for (int i = index.incrementAndGet(); i < chars.length; i = index.incrementAndGet()) { char c = chars[i]; - - if (Character.isWhitespace(c)) { + + if (c == '#' && !inComment) { + inComment = true; + } else if (c == '\n') { + inComment = false; + } else if (inComment || Character.isWhitespace(c) || c == ',') { continue; - } - if (c == ',') { - continue; - } - - if (c == '[') { + } else if (c == '[') { arrayItems.add(convert(s, index)); continue; - } - - if (c == ']') { + } else if (c == ']') { terminated = true; break; + } else { + arrayItems.add(CONVERTERS.convert(s, index)); } - - arrayItems.add(VALUE_CONVERTERS.convert(s, index)); } if (!terminated) { - return INVALID; + return ValueConverterUtils.unterminated(s.substring(startIndex, s.length())); } for (Object arrayItem : arrayItems) { diff --git a/src/main/java/com/moandjiezana/toml/DateConverter.java b/src/main/java/com/moandjiezana/toml/DateConverter.java index 6c49ba7..0f79071 100644 --- a/src/main/java/com/moandjiezana/toml/DateConverter.java +++ b/src/main/java/com/moandjiezana/toml/DateConverter.java @@ -15,9 +15,25 @@ class DateConverter implements ValueConverter { @Override public boolean canConvert(String s) { - Matcher matcher = DATE_REGEX.matcher(s); + if (s.length() < 5) { + return false; + } + + char[] chars = s.toCharArray(); - return matcher.matches(); + for (int i = 0; i < 5; i++) { + char c = chars[i]; + + if (i < 4) { + if (!Character.isDigit(c)) { + return false; + } + } else if (c != '-') { + return false; + } + } + + return true; } @Override @@ -55,9 +71,25 @@ class DateConverter implements ValueConverter { @Override public Object convert(String original, AtomicInteger index) { - String s = original.substring(index.get()); + StringBuilder sb = new StringBuilder(); + + for (int i = index.get(); i < original.length(); i = index.incrementAndGet()) { + char c = original.charAt(i); + if (Character.isDigit(c) || c == '-' || c == ':' || c == '.' || c == 'T' || c == 'Z') { + sb.append(c); + } else { + index.decrementAndGet(); + break; + } + } + + String s = sb.toString(); Matcher matcher = DATE_REGEX.matcher(s); - matcher.matches(); + + if (!matcher.matches()) { + return INVALID; + } + String dateString = matcher.group(1); String zone = matcher.group(3); String fractionalSeconds = matcher.group(2); @@ -72,9 +104,7 @@ class DateConverter implements ValueConverter { } else if (zone.contains(":")) { dateString += zone.replace(":", ""); } - - index.addAndGet(matcher.end(3) - 1); - + try { SimpleDateFormat dateFormat = new SimpleDateFormat(format); dateFormat.setLenient(false); diff --git a/src/main/java/com/moandjiezana/toml/Identifier.java b/src/main/java/com/moandjiezana/toml/Identifier.java new file mode 100644 index 0000000..acf44fc --- /dev/null +++ b/src/main/java/com/moandjiezana/toml/Identifier.java @@ -0,0 +1,60 @@ +package com.moandjiezana.toml; + +class Identifier { + + static final Identifier INVALID = new Identifier(""); + + private final String name; + private final Type type; + + Identifier(String name) { + this.name = name; + if (name.startsWith("[[")) { + this.type = Type.TABLE_ARRAY; + } else if (name.startsWith("[")) { + this.type = Type.TABLE; + } else { + this.type = Type.KEY; + } + } + + boolean acceptsNext(char c) { + if (isKey()) { + return c == '='; + } + + return c == '\n' || c == '#'; + } + + String getName() { + return name; + } + + boolean isKey() { + return type == Type.KEY; + } + + boolean isTable() { + return type == Type.TABLE; + } + + boolean isTableArray() { + return type == Type.TABLE_ARRAY; + } + + boolean isValid() { + if (isKey()) { + return Keys.getKey(name) != null; + } + + if (isTable()) { + return Keys.getTableName(name) != null; + } + + return Keys.getTableArrayName(name) != null; + } + + private static enum Type { + KEY, TABLE, TABLE_ARRAY; + } +} diff --git a/src/main/java/com/moandjiezana/toml/IdentifierConverter.java b/src/main/java/com/moandjiezana/toml/IdentifierConverter.java new file mode 100644 index 0000000..44d6aa5 --- /dev/null +++ b/src/main/java/com/moandjiezana/toml/IdentifierConverter.java @@ -0,0 +1,33 @@ +package com.moandjiezana.toml; + +import java.util.concurrent.atomic.AtomicInteger; + +class IdentifierConverter { + + static final IdentifierConverter IDENTIFIER_CONVERTER = new IdentifierConverter(); + + Identifier convert(char[] chars, AtomicInteger index) { + boolean quoted = false; + StringBuilder name = new StringBuilder(); + Identifier identifier = null; + + for (int i = index.get(); i < chars.length; i = index.incrementAndGet()) { + char c = chars[i]; + if (c == '"' && (i == 0 || chars[i - 1] != '\\')) { + quoted = !quoted; + name.append('"'); + } else if (c == '\n' || (!quoted && (c == '#' || c == '='))) { + return new Identifier(name.toString().trim()); + } else if (i == chars.length - 1 && identifier == null) { + name.append(c); + return new Identifier(name.toString().trim()); + } else { + name.append(c); + } + } + + return identifier != null ? identifier : Identifier.INVALID; + } + + private IdentifierConverter() {} +} diff --git a/src/main/java/com/moandjiezana/toml/InlineTableConverter.java b/src/main/java/com/moandjiezana/toml/InlineTableConverter.java index 54c3a26..bcfda23 100644 --- a/src/main/java/com/moandjiezana/toml/InlineTableConverter.java +++ b/src/main/java/com/moandjiezana/toml/InlineTableConverter.java @@ -1,6 +1,7 @@ package com.moandjiezana.toml; import static com.moandjiezana.toml.ValueConverterUtils.INVALID; +import static com.moandjiezana.toml.ValueConverters.CONVERTERS; import java.util.HashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -8,7 +9,6 @@ import java.util.concurrent.atomic.AtomicInteger; class InlineTableConverter implements ValueConverter { static final InlineTableConverter INLINE_TABLE_PARSER = new InlineTableConverter(); - private static final ValueConverters CONVERTERS = new ValueConverters(); @Override public boolean canConvert(String s) { diff --git a/src/main/java/com/moandjiezana/toml/MultilineLiteralStringConverter.java b/src/main/java/com/moandjiezana/toml/MultilineLiteralStringConverter.java index 37323d2..5210727 100644 --- a/src/main/java/com/moandjiezana/toml/MultilineLiteralStringConverter.java +++ b/src/main/java/com/moandjiezana/toml/MultilineLiteralStringConverter.java @@ -29,9 +29,14 @@ class MultilineLiteralStringConverter implements ValueConverter { @Override public Object convert(String s, AtomicInteger index) { char[] chars = s.toCharArray(); + int originalStartIndex = index.get(); int startIndex = index.addAndGet(3); int endIndex = -1; + if (chars[startIndex] == '\n') { + startIndex = index.incrementAndGet(); + } + for (int i = startIndex; i < chars.length; i = index.incrementAndGet()) { char c = chars[i]; @@ -41,6 +46,10 @@ class MultilineLiteralStringConverter implements ValueConverter { break; } } + + if (endIndex == -1) { + return ValueConverterUtils.unterminated(s.substring(originalStartIndex, s.length())); + } return s.substring(startIndex, endIndex); } diff --git a/src/main/java/com/moandjiezana/toml/MultilineStringConverter.java b/src/main/java/com/moandjiezana/toml/MultilineStringConverter.java index 55417ac..631e964 100644 --- a/src/main/java/com/moandjiezana/toml/MultilineStringConverter.java +++ b/src/main/java/com/moandjiezana/toml/MultilineStringConverter.java @@ -2,6 +2,7 @@ package com.moandjiezana.toml; import static com.moandjiezana.toml.ValueConverterUtils.INVALID; import static com.moandjiezana.toml.ValueConverterUtils.isComment; +import static com.moandjiezana.toml.ValueConverterUtils.unterminated; import java.util.concurrent.atomic.AtomicInteger; @@ -29,9 +30,14 @@ class MultilineStringConverter implements ValueConverter { @Override public Object convert(String s, AtomicInteger index) { char[] chars = s.toCharArray(); + int originalStartIndex = index.get(); int startIndex = index.addAndGet(3); int endIndex = -1; - + + if (chars[startIndex] == '\n') { + startIndex = index.incrementAndGet(); + } + for (int i = startIndex; i < chars.length; i = index.incrementAndGet()) { char c = chars[i]; @@ -43,7 +49,7 @@ class MultilineStringConverter implements ValueConverter { } if (endIndex == -1) { - return INVALID; + return unterminated(s.substring(originalStartIndex, s.length())); } s = s.substring(startIndex, endIndex); diff --git a/src/main/java/com/moandjiezana/toml/Results.java b/src/main/java/com/moandjiezana/toml/Results.java index 8b02882..5a64497 100644 --- a/src/main/java/com/moandjiezana/toml/Results.java +++ b/src/main/java/com/moandjiezana/toml/Results.java @@ -50,6 +50,33 @@ class Results { .append('\n'); } + void invalidIdentifier(Identifier identifier, int line) { + if (identifier.isKey()) { + invalidKey(identifier.getName(), line); + } else if (identifier.isTable()) { + invalidTable(identifier.getName(), line); + } else if (identifier.isTableArray()) { + invalidTableArray(identifier.getName(), line); + } + } + + void invalidTextAfterIdentifier(Identifier identifier, char text, int line) { + if (identifier.isKey() && text == '\n') { + sb.append("Key ") + .append(identifier.getName()) + .append(" is not followed by an equals sign on line ") + .append(line) + .append('\n'); + } else { + sb.append("Invalid text after key ") + .append(identifier.getName()) + .append(" on line ") + .append(line) + .append(". Make sure to terminate the value or add a comment (#).") + .append('\n'); + } + } + void invalidKey(String key, int line) { sb.append("Invalid key"); if (line > -1) { @@ -79,13 +106,13 @@ class Results { .append('\n'); } - void unterminated(String key, String multiline, int line) { - sb.append("Unterminated multiline value on line ") + void unterminated(String key, String value, int line) { + sb.append("Unterminated value on line ") .append(line) .append(": ") .append(key) .append(" = ") - .append(multiline.trim()) + .append(value.trim()) .append('\n'); } diff --git a/src/main/java/com/moandjiezana/toml/TomlParser.java b/src/main/java/com/moandjiezana/toml/TomlParser.java index 2244a85..eeb3bc9 100644 --- a/src/main/java/com/moandjiezana/toml/TomlParser.java +++ b/src/main/java/com/moandjiezana/toml/TomlParser.java @@ -1,203 +1,73 @@ package com.moandjiezana.toml; +import static com.moandjiezana.toml.IdentifierConverter.IDENTIFIER_CONVERTER; import static com.moandjiezana.toml.ValueConverterUtils.INVALID; -import static com.moandjiezana.toml.ValueConverterUtils.isComment; -import java.util.regex.Pattern; +import java.util.concurrent.atomic.AtomicInteger; + +import com.moandjiezana.toml.ValueConverterUtils.Unterminated; class TomlParser { - private static final String STRING_LITERAL_DELIMITER = "'''"; - private static final Pattern MULTILINE_ARRAY_REGEX = Pattern.compile("\\s*\\[([^\\]]*)"); - private static final Pattern MULTILINE_ARRAY_REGEX_END = Pattern.compile("\\s*\\]"); - private static final ValueConverters VALUE_ANALYSIS = new ValueConverters(); - - private final Results results = new Results(); Results run(String tomlString) { + final Results results = new Results(); + if (tomlString.isEmpty()) { return results; } - - String[] lines = tomlString.split("[\\n\\r]"); - int lastKeyLine = 1; - StringBuilder multilineBuilder = new StringBuilder(); - Multiline multiline = Multiline.NONE; - String key = null; - String value = null; - - for (int i = 0; i < lines.length; i++) { - String line = lines[i]; - - if (line != null && multiline.isTrimmable()) { - line = line.trim(); - } - - if (isComment(line) || line.isEmpty()) { - continue; - } - - // TODO check that this works in multiline context - if (isTableArray(line)) { - String tableName = Keys.getTableArrayName(line); - if (tableName != null) { - results.startTableArray(tableName); - } else { - results.errors.invalidTableArray(line, i + 1); - } - - continue; - } - - if (multiline.isNotMultiline() && isTable(line)) { - String tableName = Keys.getTableName(line); - if (tableName != null) { - results.startTables(tableName); - } else { - results.errors.invalidTable(line.trim(), i + 1); - } - - continue; - } - - if (multiline.isNotMultiline() && !line.contains("=")) { - results.errors.invalidKey(line, i + 1); - continue; - } - - String[] pair = line.split("=", 2); - - if (multiline.isNotMultiline() && MULTILINE_ARRAY_REGEX.matcher(pair[1].trim()).matches()) { - multiline = Multiline.ARRAY; - key = pair[0].trim(); - multilineBuilder.append(removeComment(pair[1])); - continue; - } - - if (multiline.isNotMultiline() && pair[1].trim().startsWith("\"\"\"")) { - multiline = Multiline.STRING; - multilineBuilder.append(pair[1]); - key = pair[0].trim(); - - if (pair[1].trim().indexOf("\"\"\"", 3) > -1) { - multiline = Multiline.NONE; - pair[1] = multilineBuilder.toString().trim(); - multilineBuilder.delete(0, multilineBuilder.length()); - } else { - if (multilineBuilder.toString().trim().length() > 3) { - multilineBuilder.append('\n'); - } - continue; - } - } - - if (multiline.isNotMultiline() && pair[1].trim().startsWith(STRING_LITERAL_DELIMITER)) { - multiline = Multiline.STRING_LITERAL; - multilineBuilder.append(pair[1]); - key = pair[0].trim(); - - if (pair[1].trim().indexOf(STRING_LITERAL_DELIMITER, 3) > -1) { - multiline = Multiline.NONE; - pair[1] = multilineBuilder.toString().trim(); - multilineBuilder.delete(0, multilineBuilder.length()); - } else { - if (multilineBuilder.toString().trim().length() > 3) { - multilineBuilder.append('\n'); - } - continue; - } - } - - if (multiline == Multiline.ARRAY) { - String lineWithoutComment = removeComment(line); - multilineBuilder.append(lineWithoutComment); - if (MULTILINE_ARRAY_REGEX_END.matcher(lineWithoutComment).matches()) { - multiline = Multiline.NONE; - value = multilineBuilder.toString(); - multilineBuilder.delete(0, multilineBuilder.length()); - } else { - continue; - } - } else if (multiline == Multiline.STRING) { - multilineBuilder.append(line); - if (line.contains("\"\"\"")) { - multiline = Multiline.NONE; - value = multilineBuilder.toString().trim(); - multilineBuilder.delete(0, multilineBuilder.length()); - } else { - multilineBuilder.append('\n'); - continue; - } - } else if (multiline == Multiline.STRING_LITERAL) { - multilineBuilder.append(line); - if (line.contains(STRING_LITERAL_DELIMITER)) { - multiline = Multiline.NONE; - value = multilineBuilder.toString().trim(); - multilineBuilder.delete(0, multilineBuilder.length()); - } else { - multilineBuilder.append('\n'); - continue; - } - } else { - key = Keys.getKey(pair[0]); - if (key == null) { - results.errors.invalidKey(pair[0], i + 1); - continue; - } - value = pair[1].trim(); - } - - lastKeyLine = i + 1; - Object convertedValue = VALUE_ANALYSIS.convert(value); - - if (convertedValue != INVALID) { - results.addValue(key, convertedValue); - } else { - results.errors.invalidValue(key, value, i + 1); - } - } + char[] chars = tomlString.toCharArray(); + AtomicInteger index = new AtomicInteger(); + boolean inComment = false; + AtomicInteger line = new AtomicInteger(1); + Identifier identifier = null; + Object value = null; - if (multiline != Multiline.NONE) { - results.errors.unterminated(key, multilineBuilder.toString().trim(), lastKeyLine); + for (int i = index.get(); i < chars.length; i = index.incrementAndGet()) { + char c = chars[i]; + + if (c == '#' && !inComment) { + inComment = true; + } else if (!Character.isWhitespace(c) && !inComment && identifier == null) { + Identifier id = IDENTIFIER_CONVERTER.convert(chars, index); + + if (id.isValid()) { + char next = chars[index.get()]; + if (index.get() < chars.length -1 && !id.acceptsNext(next)) { + results.errors.invalidTextAfterIdentifier(id, next, line.get()); + } else if (id.isKey()) { + identifier = id; + } else if (id.isTable()) { + results.startTables(Keys.getTableName(id.getName())); + } else if (id.isTableArray()) { + results.startTableArray(Keys.getTableArrayName(id.getName())); + } + inComment = next == '#'; + } else { + results.errors.invalidIdentifier(id, line.get()); + } + } else if (c == '\n') { + inComment = false; + identifier = null; + value = null; + line.incrementAndGet(); + } else if (!inComment && identifier != null && identifier.isKey() && value == null && !Character.isWhitespace(c)) { + int startIndex = index.get(); + Object converted = ValueConverters.CONVERTERS.convert(tomlString, index); + value = converted; + + if (converted == INVALID) { + results.errors.invalidValue(identifier.getName(), tomlString.substring(startIndex, Math.min(index.get(), tomlString.length() - 1)), line.get()); + } else if (converted instanceof Unterminated) { + results.errors.unterminated(identifier.getName(), ((Unterminated) converted).payload, line.get()); + } else { + results.addValue(identifier.getName(), converted); + } + } else if (value != null && !inComment && !Character.isWhitespace(c)) { + results.errors.invalidTextAfterIdentifier(identifier, c, line.get()); + } } return results; } - - private boolean isTableArray(String line) { - return line.startsWith("[["); - } - - private boolean isTable(String line) { - return line.startsWith("["); - } - - private String removeComment(String line) { - line = line.trim(); - if (line.startsWith("\"")) { - int startOfComment = line.indexOf('#', line.lastIndexOf('"')); - if (startOfComment > -1) { - return line.substring(0, startOfComment - 1).trim(); - } - } else { - int startOfComment = line.indexOf('#'); - if (startOfComment > -1) { - return line.substring(0, startOfComment - 1).trim(); - } - } - - return line; - } - - private static enum Multiline { - NONE, ARRAY, STRING, STRING_LITERAL; - - public boolean isNotMultiline() { - return this == NONE; - } - - public boolean isTrimmable() { - return this == NONE || this == ARRAY; - } - } } diff --git a/src/main/java/com/moandjiezana/toml/ValueConverterUtils.java b/src/main/java/com/moandjiezana/toml/ValueConverterUtils.java index a50efb8..5603d69 100644 --- a/src/main/java/com/moandjiezana/toml/ValueConverterUtils.java +++ b/src/main/java/com/moandjiezana/toml/ValueConverterUtils.java @@ -4,6 +4,18 @@ package com.moandjiezana.toml; class ValueConverterUtils { static final Object INVALID = new Object(); + static Unterminated unterminated(String payload) { + return new Unterminated(payload); + } + + static class Unterminated { + final String payload; + + private Unterminated(String payload) { + this.payload = payload; + } + } + static boolean isComment(String line) { if (line == null || line.isEmpty()) { return true; diff --git a/src/main/java/com/moandjiezana/toml/ValueConverters.java b/src/main/java/com/moandjiezana/toml/ValueConverters.java index c68fe6a..2800188 100644 --- a/src/main/java/com/moandjiezana/toml/ValueConverters.java +++ b/src/main/java/com/moandjiezana/toml/ValueConverters.java @@ -15,21 +15,12 @@ import java.util.concurrent.atomic.AtomicInteger; class ValueConverters { + static final ValueConverters CONVERTERS = new ValueConverters(); + private static final ValueConverter[] PARSERS = { MULTILINE_STRING_PARSER, MULTILINE_LITERAL_STRING_CONVERTER, LITERAL_STRING_PARSER, STRING_PARSER, DATE_PARSER, NUMBER_PARSER, BOOLEAN_PARSER, ARRAY_PARSER, INLINE_TABLE_PARSER }; - Object convert(String value) { - for (ValueConverter valueParser : PARSERS) { - if (valueParser.canConvert(value)) { - return valueParser.convert(value); - } - } - - return INVALID; - } - - Object convert(String value, AtomicInteger index) { String substring = value.substring(index.get()); for (ValueConverter valueParser : PARSERS) { @@ -40,4 +31,6 @@ class ValueConverters { return INVALID; } + + private ValueConverters() {} }