Parse multiline string literals and all multiline strings with text on

the first line
This commit is contained in:
moandji.ezana 2014-08-22 23:59:02 +02:00
parent 1f290c94b8
commit ccca314649
9 changed files with 154 additions and 10 deletions

View file

@ -1,5 +1,5 @@
NEXT
Multiline strings.
Multiline and literal strings.
Pass TOML validator (https://github.com/BurntSushi/toml-test), which uncovered many bugs.
Key names can contain dots.
Reduced visibility of internal classes, so that only Toml class is visible to users.

View file

@ -0,0 +1,47 @@
package com.moandjiezana.toml;
import static com.moandjiezana.toml.ValueConverterUtils.INVALID;
import static com.moandjiezana.toml.ValueConverterUtils.parse;
import static com.moandjiezana.toml.ValueConverterUtils.parser;
import java.util.List;
import org.parboiled.errors.ParseError;
import org.parboiled.parserunners.RecoveringParseRunner;
import org.parboiled.support.ParseTreeUtils;
import org.parboiled.support.ParsingResult;
class MultilineLiteralStringConverter implements ValueConverter {
public static void main(String[] args) {
ParsingResult<List<java.lang.String>> parsingResult = new RecoveringParseRunner<List<String>>(ValueConverterUtils.parser().MultilineLiteralString()).run("'''abc''' # comment");
if (parsingResult.hasErrors()) {
for (ParseError parseError : parsingResult.parseErrors) {
System.out.println(parseError.getInputBuffer().extract(0, 1000));
}
}
System.out.println(ParseTreeUtils.printNodeTree(parsingResult));
}
static final MultilineLiteralStringConverter MULTILINE_LITERAL_STRING_CONVERTER = new MultilineLiteralStringConverter();
@Override
public boolean canConvert(String s) {
return s.startsWith("'''");
}
@Override
public Object convert(String s) {
List<String> result = parse(parser().MultilineLiteralString(), s);
if (result == null) {
return INVALID;
}
return result.get(0);
}
private MultilineLiteralStringConverter() {}
}

View file

@ -8,6 +8,7 @@ import java.util.List;
import java.util.regex.Pattern;
class TomlParser {
private static final String STRING_LITERAL_DELIMITER = "'''";
private static final Pattern MULTILINE_ARRAY_REGEX = Pattern.compile("\\s*\\[([^\\]]*)");
private static final Pattern MULTILINE_ARRAY_REGEX_END = Pattern.compile("\\s*\\]");
private static final ValueConverters VALUE_ANALYSIS = new ValueConverters();
@ -29,7 +30,7 @@ class TomlParser {
for (int i = 0; i < lines.length; i++) {
String line = lines[i];
if (line != null && multiline != Multiline.STRING) {
if (line != null && multiline.isTrimmable()) {
line = line.trim();
}
@ -87,10 +88,30 @@ class TomlParser {
pair[1] = multilineBuilder.toString().trim();
multilineBuilder.delete(0, multilineBuilder.length());
} else {
if (multilineBuilder.toString().trim().length() > 3) {
multilineBuilder.append('\n');
}
continue;
}
}
if (multiline.isNotMultiline() && pair[1].trim().startsWith(STRING_LITERAL_DELIMITER)) {
multiline = Multiline.STRING_LITERAL;
multilineBuilder.append(pair[1]);
key = pair[0].trim();
if (pair[1].trim().indexOf(STRING_LITERAL_DELIMITER, 3) > -1) {
multiline = Multiline.NONE;
pair[1] = multilineBuilder.toString().trim();
multilineBuilder.delete(0, multilineBuilder.length());
} else {
if (multilineBuilder.toString().trim().length() > 3) {
multilineBuilder.append('\n');
}
continue;
}
}
if (multiline == Multiline.ARRAY) {
String lineWithoutComment = removeComment(line);
multilineBuilder.append(lineWithoutComment);
@ -111,6 +132,16 @@ class TomlParser {
multilineBuilder.append('\n');
continue;
}
} else if (multiline == Multiline.STRING_LITERAL) {
multilineBuilder.append(line);
if (line.contains(STRING_LITERAL_DELIMITER)) {
multiline = Multiline.NONE;
value = multilineBuilder.toString().trim();
multilineBuilder.delete(0, multilineBuilder.length());
} else {
multilineBuilder.append('\n');
continue;
}
} else {
key = pair[0].trim();
value = pair[1].trim();
@ -203,10 +234,14 @@ class TomlParser {
}
private static enum Multiline {
NONE, ARRAY, STRING;
NONE, ARRAY, STRING, STRING_LITERAL;
public boolean isNotMultiline() {
return this == NONE;
}
public boolean isTrimmable() {
return this == NONE || this == ARRAY;
}
}
}

View file

@ -6,6 +6,7 @@ import static com.moandjiezana.toml.DateConverter.DATE_PARSER;
import static com.moandjiezana.toml.FloatConverter.FLOAT_PARSER;
import static com.moandjiezana.toml.IntegerConverter.INTEGER_PARSER;
import static com.moandjiezana.toml.LiteralStringConverter.LITERAL_STRING_PARSER;
import static com.moandjiezana.toml.MultilineLiteralStringConverter.MULTILINE_LITERAL_STRING_CONVERTER;
import static com.moandjiezana.toml.MultilineStringConverter.MULTILINE_STRING_PARSER;
import static com.moandjiezana.toml.StringConverter.STRING_PARSER;
import static com.moandjiezana.toml.ValueConverterUtils.INVALID;
@ -13,7 +14,7 @@ import static com.moandjiezana.toml.ValueConverterUtils.INVALID;
class ValueConverters {
private static final ValueConverter[] PARSERS = {
MULTILINE_STRING_PARSER, LITERAL_STRING_PARSER, STRING_PARSER, DATE_PARSER, INTEGER_PARSER, FLOAT_PARSER, BOOLEAN_PARSER, ARRAY_PARSER
MULTILINE_STRING_PARSER, MULTILINE_LITERAL_STRING_CONVERTER, LITERAL_STRING_PARSER, STRING_PARSER, DATE_PARSER, INTEGER_PARSER, FLOAT_PARSER, BOOLEAN_PARSER, ARRAY_PARSER
};
public Object convert(String value) {

View file

@ -6,9 +6,22 @@ import java.util.List;
import org.parboiled.BaseParser;
import org.parboiled.Rule;
import org.parboiled.annotations.BuildParseTree;
import org.parboiled.parserunners.RecoveringParseRunner;
import org.parboiled.support.ParseTreeUtils;
import org.parboiled.support.ParsingResult;
@BuildParseTree
class ValueParser extends BaseParser<List<Object>> {
public static void main(String[] args) {
ParsingResult<Object> parsingResult = new RecoveringParseRunner<Object>(ValueConverterUtils.parser().T()).run("'''abc''' # comment");
System.out.println(ParseTreeUtils.printNodeTree(parsingResult));
}
public Rule T() {
return Sequence("'''", OneOrMore(TestNot("'''"), ANY), "'''", Comment());
}
public Rule Array() {
return FirstOf(EmptyArray(), Sequence('[', startList(), OneOrMore(FirstOf(NonEmptyArray(), ' ', ',')), ']', endList()));
@ -23,7 +36,11 @@ class ValueParser extends BaseParser<List<Object>> {
}
public Rule LiteralString() {
return FirstOf(Sequence('\'', '\'', startList(), pushToken(""), endList()), Sequence('\'', OneOrMore(TestNot("'"), ANY), startList(), pushToken(match()) , '\'', endList(), Comment()));
return FirstOf(EmptyLiteralString(), Sequence('\'', OneOrMore(TestNot("'"), ANY), startList(), pushToken(match()) , '\'', endList(), Comment()));
}
public Rule MultilineLiteralString() {
return FirstOf(EmptyMultilineLiteralString(), Sequence("'''", startList(), Sequence(OneOrMore(TestNot("'''"), ANY), pushToken(match())), "'''", endList(), Comment()));
}
public Rule Boolean() {
@ -42,6 +59,14 @@ class ValueParser extends BaseParser<List<Object>> {
return Sequence(Sequence('"', ZeroOrMore(Sequence(TestNot('"'), ANY)), '"'), pushToken(match()));
}
Rule EmptyLiteralString() {
return Sequence('\'', '\'', startList(), pushToken(""), endList());
}
Rule EmptyMultilineLiteralString() {
return Sequence("'''", "'''", startList(), pushToken(""), endList(), Comment());
}
Rule EmptyArray() {
return Sequence('[', ']', startList(), endList());
}

View file

@ -64,7 +64,16 @@ public class TomlTest {
assertEquals("Tom \"Dubs\" Preston-Werner", toml.getString("quoted"));
assertEquals("<\\i\\c*\\s*>", toml.getString("regex"));
}
@Test
public void should_get_multiline_literal_string() throws Exception {
Toml toml = new Toml().parse(file("should_get_multiline_literal_string"));
assertTrue(toml.getString("empty_line").isEmpty());
assertEquals(toml.getString("regex2_ref"), toml.getString("regex2"));
assertEquals(toml.getString("lines_ref"), toml.getString("lines"));
}
@Test
public void should_get_number() throws Exception {
Toml toml = new Toml().parse("b = 1001");
@ -316,6 +325,10 @@ public class TomlTest {
assertEquals(cal.getTime(), toml.getDate("d"));
assertThat(toml.getList("e", String.class), Matchers.contains("a", "b"));
assertTrue(toml.getBoolean("f"));
assertEquals("abc", toml.getString("g"));
assertEquals("abc", toml.getString("h"));
assertEquals("abc\nabc", toml.getString("i"));
assertEquals("abc\nabc", toml.getString("j"));
}
@Test
@ -428,7 +441,12 @@ public class TomlTest {
public void should_fail_on_invalid_literal_string() {
new Toml().parse("a = ' ' jdkf");
}
@Test(expected = IllegalStateException.class)
public void should_fail_on_invalid_multiline_string() {
new Toml().parse("a = \"\"\" \"\"\" jdkf");
}
private File file(String file) {
return new File(getClass().getResource(file + ".toml").getFile());
}

View file

@ -3,4 +3,10 @@ b = 1.1 # comment
c = "abc" # comment
d = 2014-08-04T13:47:00Z # comment
e = ["a", "b"] # comment
f = true # comment
f = true # comment
g = """abc""" # comment
h = '''abc''' # comment
i = """abc
abc""" # comment
j = '''abc
abc''' # comment

View file

@ -1,5 +1,5 @@
ref = "One\nTwo"
one_line = """One\nTwo"""
one_line = """One\nTwo""" # comment
many_lines = """
One
Two"""
Two""" # comment

View file

@ -0,0 +1,12 @@
regex2_ref = "I [dw]on't need \\d{2} apples"
regex2 = '''I [dw]on't need \d{2} apples'''
lines_ref = "The first newline is\ntrimmed in raw strings.\n All other whitespace\n is preserved.\n"
lines = '''
The first newline is
trimmed in raw strings.
All other whitespace
is preserved.
'''
empty_line = ''''''