Parse multiline string literals and all multiline strings with text on

the first line
This commit is contained in:
moandji.ezana 2014-08-22 23:59:02 +02:00
parent 1f290c94b8
commit ccca314649
9 changed files with 154 additions and 10 deletions

View file

@ -1,5 +1,5 @@
NEXT NEXT
Multiline strings. Multiline and literal strings.
Pass TOML validator (https://github.com/BurntSushi/toml-test), which uncovered many bugs. Pass TOML validator (https://github.com/BurntSushi/toml-test), which uncovered many bugs.
Key names can contain dots. Key names can contain dots.
Reduced visibility of internal classes, so that only Toml class is visible to users. Reduced visibility of internal classes, so that only Toml class is visible to users.

View file

@ -0,0 +1,47 @@
package com.moandjiezana.toml;
import static com.moandjiezana.toml.ValueConverterUtils.INVALID;
import static com.moandjiezana.toml.ValueConverterUtils.parse;
import static com.moandjiezana.toml.ValueConverterUtils.parser;
import java.util.List;
import org.parboiled.errors.ParseError;
import org.parboiled.parserunners.RecoveringParseRunner;
import org.parboiled.support.ParseTreeUtils;
import org.parboiled.support.ParsingResult;
class MultilineLiteralStringConverter implements ValueConverter {
public static void main(String[] args) {
ParsingResult<List<java.lang.String>> parsingResult = new RecoveringParseRunner<List<String>>(ValueConverterUtils.parser().MultilineLiteralString()).run("'''abc''' # comment");
if (parsingResult.hasErrors()) {
for (ParseError parseError : parsingResult.parseErrors) {
System.out.println(parseError.getInputBuffer().extract(0, 1000));
}
}
System.out.println(ParseTreeUtils.printNodeTree(parsingResult));
}
static final MultilineLiteralStringConverter MULTILINE_LITERAL_STRING_CONVERTER = new MultilineLiteralStringConverter();
@Override
public boolean canConvert(String s) {
return s.startsWith("'''");
}
@Override
public Object convert(String s) {
List<String> result = parse(parser().MultilineLiteralString(), s);
if (result == null) {
return INVALID;
}
return result.get(0);
}
private MultilineLiteralStringConverter() {}
}

View file

@ -8,6 +8,7 @@ import java.util.List;
import java.util.regex.Pattern; import java.util.regex.Pattern;
class TomlParser { class TomlParser {
private static final String STRING_LITERAL_DELIMITER = "'''";
private static final Pattern MULTILINE_ARRAY_REGEX = Pattern.compile("\\s*\\[([^\\]]*)"); private static final Pattern MULTILINE_ARRAY_REGEX = Pattern.compile("\\s*\\[([^\\]]*)");
private static final Pattern MULTILINE_ARRAY_REGEX_END = Pattern.compile("\\s*\\]"); private static final Pattern MULTILINE_ARRAY_REGEX_END = Pattern.compile("\\s*\\]");
private static final ValueConverters VALUE_ANALYSIS = new ValueConverters(); private static final ValueConverters VALUE_ANALYSIS = new ValueConverters();
@ -29,7 +30,7 @@ class TomlParser {
for (int i = 0; i < lines.length; i++) { for (int i = 0; i < lines.length; i++) {
String line = lines[i]; String line = lines[i];
if (line != null && multiline != Multiline.STRING) { if (line != null && multiline.isTrimmable()) {
line = line.trim(); line = line.trim();
} }
@ -87,10 +88,30 @@ class TomlParser {
pair[1] = multilineBuilder.toString().trim(); pair[1] = multilineBuilder.toString().trim();
multilineBuilder.delete(0, multilineBuilder.length()); multilineBuilder.delete(0, multilineBuilder.length());
} else { } else {
if (multilineBuilder.toString().trim().length() > 3) {
multilineBuilder.append('\n');
}
continue; continue;
} }
} }
if (multiline.isNotMultiline() && pair[1].trim().startsWith(STRING_LITERAL_DELIMITER)) {
multiline = Multiline.STRING_LITERAL;
multilineBuilder.append(pair[1]);
key = pair[0].trim();
if (pair[1].trim().indexOf(STRING_LITERAL_DELIMITER, 3) > -1) {
multiline = Multiline.NONE;
pair[1] = multilineBuilder.toString().trim();
multilineBuilder.delete(0, multilineBuilder.length());
} else {
if (multilineBuilder.toString().trim().length() > 3) {
multilineBuilder.append('\n');
}
continue;
}
}
if (multiline == Multiline.ARRAY) { if (multiline == Multiline.ARRAY) {
String lineWithoutComment = removeComment(line); String lineWithoutComment = removeComment(line);
multilineBuilder.append(lineWithoutComment); multilineBuilder.append(lineWithoutComment);
@ -111,6 +132,16 @@ class TomlParser {
multilineBuilder.append('\n'); multilineBuilder.append('\n');
continue; continue;
} }
} else if (multiline == Multiline.STRING_LITERAL) {
multilineBuilder.append(line);
if (line.contains(STRING_LITERAL_DELIMITER)) {
multiline = Multiline.NONE;
value = multilineBuilder.toString().trim();
multilineBuilder.delete(0, multilineBuilder.length());
} else {
multilineBuilder.append('\n');
continue;
}
} else { } else {
key = pair[0].trim(); key = pair[0].trim();
value = pair[1].trim(); value = pair[1].trim();
@ -203,10 +234,14 @@ class TomlParser {
} }
private static enum Multiline { private static enum Multiline {
NONE, ARRAY, STRING; NONE, ARRAY, STRING, STRING_LITERAL;
public boolean isNotMultiline() { public boolean isNotMultiline() {
return this == NONE; return this == NONE;
} }
public boolean isTrimmable() {
return this == NONE || this == ARRAY;
}
} }
} }

View file

@ -6,6 +6,7 @@ import static com.moandjiezana.toml.DateConverter.DATE_PARSER;
import static com.moandjiezana.toml.FloatConverter.FLOAT_PARSER; import static com.moandjiezana.toml.FloatConverter.FLOAT_PARSER;
import static com.moandjiezana.toml.IntegerConverter.INTEGER_PARSER; import static com.moandjiezana.toml.IntegerConverter.INTEGER_PARSER;
import static com.moandjiezana.toml.LiteralStringConverter.LITERAL_STRING_PARSER; import static com.moandjiezana.toml.LiteralStringConverter.LITERAL_STRING_PARSER;
import static com.moandjiezana.toml.MultilineLiteralStringConverter.MULTILINE_LITERAL_STRING_CONVERTER;
import static com.moandjiezana.toml.MultilineStringConverter.MULTILINE_STRING_PARSER; import static com.moandjiezana.toml.MultilineStringConverter.MULTILINE_STRING_PARSER;
import static com.moandjiezana.toml.StringConverter.STRING_PARSER; import static com.moandjiezana.toml.StringConverter.STRING_PARSER;
import static com.moandjiezana.toml.ValueConverterUtils.INVALID; import static com.moandjiezana.toml.ValueConverterUtils.INVALID;
@ -13,7 +14,7 @@ import static com.moandjiezana.toml.ValueConverterUtils.INVALID;
class ValueConverters { class ValueConverters {
private static final ValueConverter[] PARSERS = { private static final ValueConverter[] PARSERS = {
MULTILINE_STRING_PARSER, LITERAL_STRING_PARSER, STRING_PARSER, DATE_PARSER, INTEGER_PARSER, FLOAT_PARSER, BOOLEAN_PARSER, ARRAY_PARSER MULTILINE_STRING_PARSER, MULTILINE_LITERAL_STRING_CONVERTER, LITERAL_STRING_PARSER, STRING_PARSER, DATE_PARSER, INTEGER_PARSER, FLOAT_PARSER, BOOLEAN_PARSER, ARRAY_PARSER
}; };
public Object convert(String value) { public Object convert(String value) {

View file

@ -6,9 +6,22 @@ import java.util.List;
import org.parboiled.BaseParser; import org.parboiled.BaseParser;
import org.parboiled.Rule; import org.parboiled.Rule;
import org.parboiled.annotations.BuildParseTree; import org.parboiled.annotations.BuildParseTree;
import org.parboiled.parserunners.RecoveringParseRunner;
import org.parboiled.support.ParseTreeUtils;
import org.parboiled.support.ParsingResult;
@BuildParseTree @BuildParseTree
class ValueParser extends BaseParser<List<Object>> { class ValueParser extends BaseParser<List<Object>> {
public static void main(String[] args) {
ParsingResult<Object> parsingResult = new RecoveringParseRunner<Object>(ValueConverterUtils.parser().T()).run("'''abc''' # comment");
System.out.println(ParseTreeUtils.printNodeTree(parsingResult));
}
public Rule T() {
return Sequence("'''", OneOrMore(TestNot("'''"), ANY), "'''", Comment());
}
public Rule Array() { public Rule Array() {
return FirstOf(EmptyArray(), Sequence('[', startList(), OneOrMore(FirstOf(NonEmptyArray(), ' ', ',')), ']', endList())); return FirstOf(EmptyArray(), Sequence('[', startList(), OneOrMore(FirstOf(NonEmptyArray(), ' ', ',')), ']', endList()));
@ -23,7 +36,11 @@ class ValueParser extends BaseParser<List<Object>> {
} }
public Rule LiteralString() { public Rule LiteralString() {
return FirstOf(Sequence('\'', '\'', startList(), pushToken(""), endList()), Sequence('\'', OneOrMore(TestNot("'"), ANY), startList(), pushToken(match()) , '\'', endList(), Comment())); return FirstOf(EmptyLiteralString(), Sequence('\'', OneOrMore(TestNot("'"), ANY), startList(), pushToken(match()) , '\'', endList(), Comment()));
}
public Rule MultilineLiteralString() {
return FirstOf(EmptyMultilineLiteralString(), Sequence("'''", startList(), Sequence(OneOrMore(TestNot("'''"), ANY), pushToken(match())), "'''", endList(), Comment()));
} }
public Rule Boolean() { public Rule Boolean() {
@ -42,6 +59,14 @@ class ValueParser extends BaseParser<List<Object>> {
return Sequence(Sequence('"', ZeroOrMore(Sequence(TestNot('"'), ANY)), '"'), pushToken(match())); return Sequence(Sequence('"', ZeroOrMore(Sequence(TestNot('"'), ANY)), '"'), pushToken(match()));
} }
Rule EmptyLiteralString() {
return Sequence('\'', '\'', startList(), pushToken(""), endList());
}
Rule EmptyMultilineLiteralString() {
return Sequence("'''", "'''", startList(), pushToken(""), endList(), Comment());
}
Rule EmptyArray() { Rule EmptyArray() {
return Sequence('[', ']', startList(), endList()); return Sequence('[', ']', startList(), endList());
} }

View file

@ -64,7 +64,16 @@ public class TomlTest {
assertEquals("Tom \"Dubs\" Preston-Werner", toml.getString("quoted")); assertEquals("Tom \"Dubs\" Preston-Werner", toml.getString("quoted"));
assertEquals("<\\i\\c*\\s*>", toml.getString("regex")); assertEquals("<\\i\\c*\\s*>", toml.getString("regex"));
} }
@Test
public void should_get_multiline_literal_string() throws Exception {
Toml toml = new Toml().parse(file("should_get_multiline_literal_string"));
assertTrue(toml.getString("empty_line").isEmpty());
assertEquals(toml.getString("regex2_ref"), toml.getString("regex2"));
assertEquals(toml.getString("lines_ref"), toml.getString("lines"));
}
@Test @Test
public void should_get_number() throws Exception { public void should_get_number() throws Exception {
Toml toml = new Toml().parse("b = 1001"); Toml toml = new Toml().parse("b = 1001");
@ -316,6 +325,10 @@ public class TomlTest {
assertEquals(cal.getTime(), toml.getDate("d")); assertEquals(cal.getTime(), toml.getDate("d"));
assertThat(toml.getList("e", String.class), Matchers.contains("a", "b")); assertThat(toml.getList("e", String.class), Matchers.contains("a", "b"));
assertTrue(toml.getBoolean("f")); assertTrue(toml.getBoolean("f"));
assertEquals("abc", toml.getString("g"));
assertEquals("abc", toml.getString("h"));
assertEquals("abc\nabc", toml.getString("i"));
assertEquals("abc\nabc", toml.getString("j"));
} }
@Test @Test
@ -428,7 +441,12 @@ public class TomlTest {
public void should_fail_on_invalid_literal_string() { public void should_fail_on_invalid_literal_string() {
new Toml().parse("a = ' ' jdkf"); new Toml().parse("a = ' ' jdkf");
} }
@Test(expected = IllegalStateException.class)
public void should_fail_on_invalid_multiline_string() {
new Toml().parse("a = \"\"\" \"\"\" jdkf");
}
private File file(String file) { private File file(String file) {
return new File(getClass().getResource(file + ".toml").getFile()); return new File(getClass().getResource(file + ".toml").getFile());
} }

View file

@ -3,4 +3,10 @@ b = 1.1 # comment
c = "abc" # comment c = "abc" # comment
d = 2014-08-04T13:47:00Z # comment d = 2014-08-04T13:47:00Z # comment
e = ["a", "b"] # comment e = ["a", "b"] # comment
f = true # comment f = true # comment
g = """abc""" # comment
h = '''abc''' # comment
i = """abc
abc""" # comment
j = '''abc
abc''' # comment

View file

@ -1,5 +1,5 @@
ref = "One\nTwo" ref = "One\nTwo"
one_line = """One\nTwo""" one_line = """One\nTwo""" # comment
many_lines = """ many_lines = """
One One
Two""" Two""" # comment

View file

@ -0,0 +1,12 @@
regex2_ref = "I [dw]on't need \\d{2} apples"
regex2 = '''I [dw]on't need \d{2} apples'''
lines_ref = "The first newline is\ntrimmed in raw strings.\n All other whitespace\n is preserved.\n"
lines = '''
The first newline is
trimmed in raw strings.
All other whitespace
is preserved.
'''
empty_line = ''''''