CGX Formatter


The aim of this puzzle is to create a formatter for a new text data format. The first step is to understand what the different element types are, and the rules of formatting. There are two things which are not clear in the statement according to me :

Java

There are certainly many ways to solve this puzzle, so my solution is only an exemple. I never studied parsers or lexers, so my approche is probably quite "naive", and lacks efficiency. But it gives a cool example of inheritance / polymorphism, very important concepts of OOP.

import java.util.*;
import java.io.*;
import java.math.*;

/** This class represents an element, which can be read from CGX content
 * and printed as a well formatted String
 */
abstract class Element {

    /** modifies the representation of an element, indenting it by 'indentLevel' spaces */
    public static String indent(String toIndent, int indentLevel) {
        // Creates the indentation block
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < indentLevel; i++)
            sb.append(" ");
        String indentation = sb.toString();

        // Adds it in front of each line 
        return indentation+toIndent.replace("\n", "\n"+indentation);
    }

    /** returns an int describing the type of the element beginning by content[offset]
     * type 1 : block    type 2 : primitive type    type 3 : key value
     * type 0 : none of the 3, probably a tabulation or a space
     */
    public static int type(String content, int offset) {
        if (isBlock(content, offset))
            return 1;
        else if (isKeyValue(content, offset))
            return 3;
        else if (isPrimitive(content, offset))
            return 2;
        else
            return 0;
    }

    /** reads next element of content and return new offset
     * for each type, should work only if content[offset] is really
     * the beginning of the given type.
     */
    abstract int read(String content, int offset);

    /** returns a representation of the element, following the formatting
     * rules given in the puzzle
     */
    abstract String toFormattedCGX();


    // private functions used to identify each type

    /** returns true if and only if next Element in content is a Block */
    private static boolean isBlock(String content, int offset) {
        return content.charAt(offset) == '(';
    }

    /** returns true if next Element in content is a Primitive type
     * be careful, also returns true if it is a KeyValue, so check with isKeyValue
     * returns false if it is not a Primitive or KeyValue
     */
    private static boolean isPrimitive(String content, int offset) {
        return Character.isDigit(content.charAt(offset)) ||
        content.regionMatches(offset, "true", 0, 4) ||
        content.regionMatches(offset, "false", 0, 5) ||
        content.regionMatches(offset, "null", 0, 4) ||
        content.charAt(offset) == '\'';
    }

    /** returns true if and only if next Element in content is a KeyValue */
    private static boolean isKeyValue(String content, int offset) {
        if (content.charAt(offset) != '\'')
            return false;
        else {
            while (content.charAt(++offset) != '\'');
            ++offset;
            while (offset < content.length() && 
                   (content.charAt(offset) == ' ' || content.charAt(offset) == '\t'))
                ++offset;
            return (offset < content.length() && content.charAt(offset) == '=');
        }
    }
}

class Block extends Element {
    private ArrayList<Element> elements;
    private static final int INDENT_LEVEL = 4;

    public Block() {
        elements = new ArrayList<Element>();
    }

    public int read(String content, int offset) {
        // skips opening parenthesis
        offset++;
        // read each item :
        while (content.charAt(offset) != ')') {
            // skips optionnal ' ', tabulations and ';'
            while (content.charAt(offset) == ' ' || 
                   content.charAt(offset) == '\t' || 
                   content.charAt(offset) == ';')
                offset++;

            // if there is at least one item left
            if (content.charAt(offset) != ')') {
                Element nextElement = null;
                switch (Element.type(content, offset)) {
                    case 1 :
                        nextElement = new Block();
                        break;
                    case 2 :
                        nextElement = new Primitive();
                        break;
                    case 3 :
                        nextElement = new KeyValue();
                        break;
                }
                offset = nextElement.read(content, offset);
                elements.add(nextElement);
            }
        }
        // skip closing parenthesis and return new offset
        return ++offset;
    }

    public String toFormattedCGX() {
        StringBuilder result = new StringBuilder();
        result.append("(\n");
        
        if (elements.size() > 0) {
            for (Element e : elements) {
                result.append(indent(e.toFormattedCGX(), INDENT_LEVEL));
                result.append(";\n");
            }
            // remove last ';' and add closing parenthesis
            result.deleteCharAt(result.length()-2);
        }

        result.append(")");
        return result.toString();
    }
} 

class Primitive extends Element {

    /** string representing the primitive :
     * true, false, null, a number, a string (with quotes)*/
    private String value;

    public Primitive() {}

    public int read(String content, int offset) {
        /* sets i so that content[offset+i] is the 1st char after the primitive
           using the fact that for a string it follows ', for a number it is the  
           is 1st non digital char, and for others it is the first non lower case */ 
        int i = 0;
        
        if (content.charAt(offset) == '\'') {
            ++i;
            while (offset+i < content.length() && content.charAt(offset+i) != '\'')
                ++i;
            ++i;
        } 
        else if (Character.isDigit(content.charAt(offset)))
            while (offset+i < content.length() && Character.isDigit(content.charAt(offset+i)))
                i++;
        else
            while (offset+i < content.length() && Character.isLowerCase(content.charAt(offset+i)))
                i++;

        value = content.substring(offset, offset+i);
        return offset+i;
    }

    public String toFormattedCGX() {
        return value;
    }
}

class KeyValue extends Element {
    // key, stored as a Primitive string
    private Primitive key;
    // value, which can be a block or a Primitive
    private Element value;

    public KeyValue() {
        key = new Primitive();
    }

    public int read(String content, int offset) {
        offset = key.read(content, offset);
        // skips '=', and optionnal spaces
        while (Element.type(content, offset) == 0)
            offset++;
        switch (Element.type(content, offset)) {
            case 1 :
                value = new Block();
                break;
            case 2 :
                value = new Primitive();
                break;
        }
        offset = value.read(content, offset);
        return offset;
    }

    public String toFormattedCGX() {
        StringBuilder result = new StringBuilder();
        result.append(key.toFormattedCGX());
        result.append("=");
        if (value instanceof Block)
            result.append("\n");
        result.append(value.toFormattedCGX());
        return result.toString();
    }
}

class Solution {

    private static String content;
    private static int offset = 0;
    
    public static void main(String args[]) {
        Scanner in = new Scanner(System.in);

        // Reading CGX content
        int N = in.nextInt();
        in.nextLine();
        StringBuilder CGXlines = new StringBuilder(); 
        for (int i = 0; i < N; i++) 
            CGXlines.append(in.nextLine());
        content = CGXlines.toString();
        // For each element, reads it, formats it and prints it
        while (offset < content.length()) {
            Element nextElement = null;
            switch (Element.type(content, offset)) {
                case 1 :
                    nextElement = new Block();
                    break;
                case 2 :
                    nextElement = new Primitive();
                    break;
                case 3 :
                    nextElement = new KeyValue();
                    break;
            }
            if (nextElement == null)
                offset++;
            else {
                offset = nextElement.read(content, offset);
                System.out.println(nextElement.toFormattedCGX());
            }
        }   
    }
}