Initial work to implement regex-based parser

This commit is contained in:
moandji.ezana 2014-08-05 22:18:04 +02:00
parent 14d424e573
commit 055480ee0b
12 changed files with 655 additions and 11 deletions

View file

@ -0,0 +1,107 @@
package com.moandjiezana.toml;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
abstract class Container {
abstract boolean accepts(String key);
abstract void put(String key, Object value);
abstract Object get(String key);
static class Table extends Container {
private final Map<String, Object> values = new HashMap<String, Object>();
@Override
boolean accepts(String key) {
return !values.containsKey(key) || values.get(key) instanceof Container.TableArray;
}
@Override
void put(String key, Object value) {
values.put(key, value);
}
@Override
Object get(String key) {
return values.get(key);
}
/**
* This modifies the Table's internal data structure, such that it is no longer usable.
*
* Therefore, this method must only be called when all data has been gathered.
* @return A Map-and-List-based of the TOML data
*/
Map<String, Object> consume() {
for (Map.Entry<String, Object> entry : values.entrySet()) {
if (entry.getValue() instanceof Container.Table) {
entry.setValue(((Container.Table) entry.getValue()).consume());
} else if (entry.getValue() instanceof Container.TableArray) {
entry.setValue(((Container.TableArray) entry.getValue()).getValues());
}
}
return values;
}
@Override
public String toString() {
return values.toString();
}
}
static class TableArray extends Container {
private final List<Container.Table> values = new ArrayList<Container.Table>();
TableArray() {
values.add(new Container.Table());
}
@Override
boolean accepts(String key) {
return getCurrent().accepts(key);
}
@Override
void put(String key, Object value) {
if (value instanceof Container.Table) {
values.add((Container.Table) value);
return;
}
getCurrent().put(key, value);
}
@Override
Object get(String key) {
if (values.isEmpty()) {
return null;
}
return getCurrent().get(key);
}
List<Map<String, Object>> getValues() {
ArrayList<Map<String, Object>> unwrappedValues = new ArrayList<Map<String,Object>>();
for (Container.Table table : values) {
unwrappedValues.add(table.consume());
}
return unwrappedValues;
}
Container.Table getCurrent() {
return values.get(values.size() - 1);
}
@Override
public String toString() {
return values.toString();
}
}
private Container() {}
}

View file

@ -0,0 +1,146 @@
package com.moandjiezana.toml;
import static com.moandjiezana.toml.ValueAnalysis.INVALID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegexParser {
private static final Pattern TABLE_REGEX = Pattern.compile("\\s?\\[(.*)\\](.*)");
private static final Pattern TABLE_ARRAY_REGEX = Pattern.compile("\\s?\\[\\[(.*)\\]\\](.*)");
private static final Pattern MULTILINE_ARRAY_REGEX = Pattern.compile("\\s*\\[([^\\]]*)");
private static final Pattern MULTILINE_ARRAY_REGEX_END = Pattern.compile("\\s*\\]");
public static void main(String[] args) {
System.out.println(MULTILINE_ARRAY_REGEX.matcher(" [ ]").matches());
}
private final Results results = new Results();
public Results run(String tomlString) {
if (tomlString.isEmpty()) {
return results;
}
String[] lines = tomlString.split("[\\n\\r]");
StringBuilder multilineBuilder = new StringBuilder();
boolean multiline = false;
String key = null;
String value = null;
for (int i = 0; i < lines.length; i++) {
String line = lines[i];
if (line != null) {
line = line.trim();
}
if (isComment(line) || line.isEmpty()) {
continue;
}
if (isTableArray(line)) {
Matcher matcher = TABLE_ARRAY_REGEX.matcher(line);
matcher.matches();
String tableName = matcher.group(1);
results.startTableArray(tableName);
String afterTableName = matcher.group(2);
if (!isComment(afterTableName)) {
results.errors.append("Invalid table array definition: " + line + "\n\n");
}
continue;
}
if (isTable(line)) {
Matcher matcher = TABLE_REGEX.matcher(line);
matcher.matches();
String tableName = matcher.group(1);
results.startTables(tableName);
String afterTableName = matcher.group(2);
if (!isComment(afterTableName)) {
results.errors.append("Invalid table definition: " + line + "\n\n");
}
continue;
}
String[] pair = line.split("=");
if (!multiline && MULTILINE_ARRAY_REGEX.matcher(pair[1].trim()).matches()) {
multiline = true;
key = pair[0].trim();
multilineBuilder.append(pair[1].trim());
continue;
}
if (multiline) {
multilineBuilder.append(line);
if (MULTILINE_ARRAY_REGEX_END.matcher(line).matches()) {
multiline = false;
value = multilineBuilder.toString();
multilineBuilder.delete(0, multilineBuilder.length() - 1);
} else {
continue;
}
} else {
key = pair[0].trim();
value = pair[1].trim();
}
if (!isKeyValid(key)) {
results.errors.append("Invalid key name: " + key);
continue;
}
ValueAnalysis lineAnalysis = new ValueAnalysis(value.trim());
Object convertedValue = lineAnalysis.getValue();
if (convertedValue != INVALID) {
results.addValue(key, convertedValue);
} else {
results.errors.append("Invalid key/value: " + key + " = " + value);
}
}
return results;
}
private boolean isTableArray(String line) {
return TABLE_ARRAY_REGEX.matcher(line).matches();
}
private boolean isTable(String line) {
return TABLE_REGEX.matcher(line).matches();
}
private boolean isKeyValid(String key) {
if (key.contains(".")) {
return false;
}
return true;
}
private boolean isComment(String line) {
if (line == null || line.isEmpty()) {
return true;
}
char[] chars = line.toCharArray();
for (char c : chars) {
if (Character.isWhitespace(c)) {
continue;
}
return c == '#';
}
return false;
}
}

View file

@ -0,0 +1,110 @@
package com.moandjiezana.toml;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
class Results {
public Set<String> tables = new HashSet<String>();
public StringBuilder errors = new StringBuilder();
private Deque<Container> stack = new ArrayDeque<Container>();
public Results() {
stack.push(new Container.Table());
}
public void addValue(String key, Object value) {
Container currentTable = stack.peek();
if (currentTable.accepts(key)) {
currentTable.put(key, value);
} else {
errors.append("Key " + key + " is defined twice!\n");
}
}
public void startTableArray(String tableName) {
while (stack.size() > 1) {
stack.pop();
}
String[] tableParts = tableName.split("\\.");
for (int i = 0; i < tableParts.length; i++) {
String tablePart = tableParts[i];
Container currentContainer = stack.peek();
if (currentContainer.get(tablePart) instanceof Container.TableArray) {
Container.TableArray currentTableArray = (Container.TableArray) currentContainer.get(tablePart);
stack.push(currentTableArray);
if (i == tableParts.length - 1) {
currentTableArray.put(tablePart, new Container.Table());
}
stack.push(currentTableArray.getCurrent());
currentContainer = stack.peek();
} else if (currentContainer.get(tablePart) instanceof Container.Table) {
Container nextTable = (Container) currentContainer.get(tablePart);
stack.push(nextTable);
} else if (currentContainer.accepts(tablePart)) {
Container newContainer = i == tableParts.length - 1 ? new Container.TableArray() : new Container.Table();
addValue(tablePart, newContainer);
stack.push(newContainer);
if (newContainer instanceof Container.TableArray) {
stack.push(((Container.TableArray) newContainer).getCurrent());
}
} else {
errors.append("Duplicate key and table definitions for " + tableName + "!\n");
break;
}
}
}
public void startTables(String tableName) {
if (!tables.add(tableName)) {
errors.append("Table " + tableName + " defined twice!\n");
}
while (stack.size() > 1) {
stack.pop();
}
String[] tableParts = tableName.split("\\.");
for (int i = 0; i < tableParts.length; i++) {
String tablePart = tableParts[i];
Container currentContainer = stack.peek();
if (currentContainer.get(tablePart) instanceof Container) {
Container nextTable = (Container) currentContainer.get(tablePart);
stack.push(nextTable);
if (stack.peek() instanceof Container.TableArray) {
stack.push(((Container.TableArray) stack.peek()).getCurrent());
}
} else if (currentContainer.accepts(tablePart)) {
startTable(tablePart);
} else {
errors.append("Duplicate key and table definitions for " + tableName + "!\n");
break;
}
}
}
/**
* Warning: After this method has been called, this instance is no longer usable.
*/
public Map<String, Object> consume() {
Container values = stack.getLast();
stack.clear();
return ((Container.Table) values).consume();
}
private Container startTable(String tableName) {
Container newTable = new Container.Table();
addValue(tableName, newTable);
stack.push(newTable);
return newTable;
}
}

View file

@ -135,12 +135,13 @@ public class Toml {
// ParsingResult<Object> parsingResult = new ReportingParseRunner<Object>(parser.Toml()).run(tomlString);
// System.out.println(ParseTreeUtils.printNodeTree(parsingResult));
TomlParser.Results results = (TomlParser.Results) result.valueStack.peek(result.valueStack.size() - 1);
// TomlParser.Results results = (TomlParser.Results) result.valueStack.peek(result.valueStack.size() - 1);
Results results = new RegexParser().run(tomlString);
if (results.errors.length() > 0) {
throw new IllegalStateException(results.errors.toString());
}
this.values = results.values;
this.values = results.consume();
return this;
}

View file

@ -0,0 +1,229 @@
package com.moandjiezana.toml;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class ValueAnalysis {
static final Object INVALID = new Object();
private static final Pattern STRING_REGEX = Pattern.compile("\"(.*)\"(.*)");
private static final Pattern BOOLEAN_REGEX = Pattern.compile("(true|false)(.*)");
private static final Pattern FLOAT_REGEX = Pattern.compile("(-?[0-9\\.]*)(.*)");
private static final Pattern INTEGER_REGEX = Pattern.compile("(-?[0-9]*)(.*)");
private static final Pattern DATE_REGEX = Pattern.compile("(\\d{4}-[0-1][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]Z)(.*)");
private static final Pattern LIST_REGEX = Pattern.compile("(\\[(.*)\\])(.*)");
private static final Pattern UNICODE_REGEX = Pattern.compile("\\\\u(.*)");
private final String rawValue;
private Matcher chosenMatcher;
public ValueAnalysis(String value) {
this.rawValue = value;
}
public Object getValue() {
return convert(rawValue);
}
private Object convert(String value) {
if (isString(value)) {
return convertString(chosenMatcher.group(1));
} else if (isInteger(value)) {
return Long.valueOf(chosenMatcher.group(1));
} else if (isFloat(value)) {
return Double.valueOf(chosenMatcher.group(1));
} else if (isBoolean(value)) {
return Boolean.valueOf(chosenMatcher.group(1));
} else if (isList(value)) {
ArrayList<Object> values = new ArrayList<Object>();
value = chosenMatcher.group(1);
String[] split = value.substring(1, value.length() - 1).split(",");
for (String s : split) {
Object converted = convert(s.trim());
if (values.isEmpty() || values.get(0).getClass().isAssignableFrom(converted.getClass()) || converted.getClass().isAssignableFrom(values.get(0).getClass())) {
values.add(converted);
} else {
return INVALID;
}
}
return values;
} else if (isDate(value)) {
String s = chosenMatcher.group(1).replace("Z", "+00:00");
try {
s = s.substring(0, 22) + s.substring(23);
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
dateFormat.setLenient(false);
return dateFormat.parse(s);
} catch (Exception e) {
return INVALID;
}
} else {
return INVALID;
}
}
private boolean isString(String value) {
Matcher matcher = STRING_REGEX.matcher(value);
if (matcher.matches()) {
if (isComment(matcher.group(2))) {
chosenMatcher = matcher;
return true;
}
}
return false;
}
private boolean isFloat(String value) {
Matcher matcher = FLOAT_REGEX.matcher(value);
if (matcher.matches() && isComment(matcher.group(2))) {
chosenMatcher = matcher;
return true;
}
return false;
// char[] chars = value.toCharArray();
//
// for (int i = 0; i < chars.length; i++) {
// char ch = chars[i];
// if (Character.isDigit(ch) || ch == '.' || (i == 0 && ch == '-')) {
// continue;
// }
//
// return false;
// }
//
// return true;
}
private boolean isDate(String value) {
Matcher matcher = DATE_REGEX.matcher(value);
if (matcher.matches()) {
chosenMatcher = matcher;
return true;
}
return false;
}
private boolean isInteger(String s) {
Matcher matcher = INTEGER_REGEX.matcher(s);
if (matcher.matches() && isComment(matcher.group(2))) {
chosenMatcher = matcher;
return true;
}
return false;
// char[] chars = s.toCharArray();
//
// for (int i = 0; i < chars.length; i++) {
// if (Character.isDigit(chars[i]) || (i == 0 && chars[i] == '-')) {
// continue;
// }
//
// return false;
// }
//
// return true;
}
private boolean isList(String s) {
Matcher matcher = LIST_REGEX.matcher(s);
if (matcher.matches()) {
chosenMatcher = matcher;
return true;
}
return false;
}
private List<String> tokenizeList(String list) {
ArrayList<String> strings = new ArrayList<String>();
char[] chars = list.toCharArray();
int openIndex = -1;
for (int i = 0; i < chars.length && openIndex < 0; i++) {
}
StringBuilder token = new StringBuilder();
boolean ignore = false;
for (int i = 0; i < chars.length; i++) {
if (ignore) {
continue;
}
if (chars[i] == '[')
if (chars[i] != ',') {
token.append(chars[i]);
} else {
strings.add(token.toString().trim());
token = new StringBuilder();
}
}
return strings;
}
private boolean isBoolean(String s) {
Matcher matcher = BOOLEAN_REGEX.matcher(s);
if (matcher.matches()) {
chosenMatcher = matcher;
return true;
}
return false;
}
private boolean isComment(String line) {
if (line == null || line.isEmpty()) {
return true;
}
char[] chars = line.toCharArray();
for (char c : chars) {
if (Character.isWhitespace(c)) {
continue;
}
return c == '#';
}
return false;
}
private Object convertString(String value) {
Matcher matcher = UNICODE_REGEX.matcher(value);
while (matcher.find()) {
value = value.replace(matcher.group(), new String(Character.toChars(Integer.parseInt(matcher.group().substring(2), 16))));
}
value = value.replace("\\n", "\n")
.replace("\\\"", "\"")
.replace("\\t", "\t")
.replace("\\r", "\r")
.replace("\\\\", "\\")
.replace("\\/", "/")
.replace("\\b", "\b")
.replace("\\f", "\f");
if (value.contains("\\")) {
// results.errors.append(sc + " is a reserved special character and cannot be used!\n");
return INVALID;
}
return value;
}
}

View file

@ -16,7 +16,7 @@ public class TomlDefaultsTest {
@Before
public void before() {
defaultToml = new Toml().parse("a = \"a\"\n [group]\n a=\"a\"\n [[array]]\n b=1 [[array]]\n b=2");
defaultToml = new Toml().parse("a = \"a\"\n [group]\n a=\"a\"\n [[array]]\n b=1\n [[array]]\n b=2");
}
@Test
@ -65,7 +65,7 @@ public class TomlDefaultsTest {
@Test
public void should_perform_shallow_merge() throws Exception {
Toml toml = new Toml(defaultToml).parse("[group]\nb=1\n [[array]]\n b=0");
Toml toml2 = new Toml(defaultToml).parse("[[array]]\n b=1 [[array]]\n b=2 [[array]]\n b=3");
Toml toml2 = new Toml(defaultToml).parse("[[array]]\n b=1\n [[array]]\n b=2\n [[array]]\n b=3");
assertEquals(1, toml.getTable("group").getLong("b").intValue());
assertNull(toml.getTable("group").getString("a"));

View file

@ -4,6 +4,7 @@ import static java.util.Arrays.asList;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import java.io.File;
@ -13,7 +14,7 @@ import java.util.Map;
import java.util.TimeZone;
import org.fest.reflect.core.Reflection;
import org.junit.Ignore;
import org.hamcrest.Matchers;
import org.junit.Test;
public class TomlTest {
@ -52,12 +53,26 @@ public class TomlTest {
}
@Test
public void should_get_list() throws Exception {
public void should_get_array() throws Exception {
Toml toml = new Toml().parse("list = [\"a\", \"b\", \"c\"]");
assertEquals(asList("a", "b", "c"), toml.getList("list", String.class));
}
@Test
public void should_allow_multiline_array() throws Exception {
Toml toml = new Toml().parse(file("should_allow_multiline_array"));
assertEquals(asList("a", "b", "c"), toml.getList("a", String.class));
}
@Test
public void should_get_nested_arrays() throws Exception {
Toml clients = new Toml().parse("data = [ [\"gamma\", \"delta\"], [1, 2] ] # just an update to make sure parsers support it");
assertEquals(asList(asList("gamma", "delta"), asList(1L, 2L)), clients.getList("data", String.class));
}
@Test
public void should_get_boolean() throws Exception {
Toml toml = new Toml().parse("bool_false = false\nbool_true = true");
@ -229,6 +244,22 @@ public class TomlTest {
assertEquals(1, toml.getLong("group.key").intValue());
}
@Test
public void should_allow_comment_after_values() throws Exception {
Toml toml = new Toml().parse(new File(getClass().getResource("should_allow_comment_after_values.toml").getFile()));
assertEquals(1, toml.getLong("a").intValue());
assertEquals(1.1, toml.getDouble("b").doubleValue(), 0);
assertEquals("abc", toml.getString("c"));
Calendar cal = Calendar.getInstance();
cal.set(2014, Calendar.AUGUST, 4, 13, 47, 0);
cal.set(Calendar.MILLISECOND, 0);
cal.setTimeZone(TimeZone.getTimeZone("UTC"));
assertEquals(cal.getTime(), toml.getDate("d"));
assertThat(toml.getList("e", String.class), Matchers.contains("a", "b"));
assertTrue(toml.getBoolean("f"));
}
@Test
public void should_support_special_characters_in_strings() {
Toml toml = new Toml().parse(new File(getClass().getResource("should_support_special_characters_in_strings.toml").getFile()));
@ -238,9 +269,9 @@ public class TomlTest {
@Test
public void should_support_unicode_characters_in_strings() throws Exception {
Toml toml = new Toml().parse("key=\"\\u00B1\"\n");
Toml toml = new Toml().parse(new File(getClass().getResource("should_support_special_characters_in_strings.toml").getFile()));
assertEquals("±", toml.getString("key"));
assertEquals("more or less ±", toml.getString("unicode_key"));
}
@Test(expected = IllegalStateException.class)
@ -285,15 +316,17 @@ public class TomlTest {
new Toml().parse("a = 200-");
}
@Ignore
@Test(expected = IllegalStateException.class)
public void should_fail_when_illegal_characters_after_table() throws Exception {
new Toml().parse("[error] if you didn't catch this, your parser is broken");
}
@Ignore
@Test(expected = IllegalStateException.class)
public void should_fail_when_illegal_characters_after_key() throws Exception {
new Toml().parse("number = 3.14 pi");
}
private File file(String file) {
return new File(getClass().getResource(file + ".toml").getFile());
}
}

View file

@ -0,0 +1,5 @@
{
"~!@$^&*()_+-`1234567890[]\\|/?><.,;:'": {
"type": "integer", "value": "1"
}
}

View file

@ -0,0 +1 @@
~!@$^&*()_+-`1234567890[]\|/?><.,;:' = 1

View file

@ -0,0 +1,6 @@
a = 1 # comment
b = 1.1 # comment
c = "abc" # comment
d = 2014-08-04T13:47:00Z # comment
e = ["a", "b"] # comment
f = true # comment

View file

@ -0,0 +1,5 @@
a = [
"a",
"b",
"c"
]

View file

@ -1 +1,2 @@
key = "\" \t \n \r \\ \/ \b \f"
key = "\" \t \n \r \\ \/ \b \f"
unicode_key = "more or less \u00B1"