001    package biweekly.io.text;
002    
003    import static biweekly.util.StringUtils.NEWLINE;
004    
005    import java.io.Closeable;
006    import java.io.IOException;
007    import java.io.Reader;
008    
009    import biweekly.ICalException;
010    import biweekly.parameter.ICalParameters;
011    
012    /*
013     Copyright (c) 2013, Michael Angstadt
014     All rights reserved.
015    
016     Redistribution and use in source and binary forms, with or without
017     modification, are permitted provided that the following conditions are met: 
018    
019     1. Redistributions of source code must retain the above copyright notice, this
020     list of conditions and the following disclaimer. 
021     2. Redistributions in binary form must reproduce the above copyright notice,
022     this list of conditions and the following disclaimer in the documentation
023     and/or other materials provided with the distribution. 
024    
025     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
026     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
027     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
028     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
029     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
030     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
031     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
032     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
033     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
034     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
035     */
036    
037    /**
038     * Parses an iCalendar data stream.
039     * @author Michael Angstadt
040     * @rfc 5545
041     */
042    public class ICalRawReader implements Closeable {
043            private final FoldedLineReader reader;
044            private boolean caretDecodingEnabled = true;
045            private boolean eof = false;
046    
047            /**
048             * Creates a new reader.
049             * @param reader the reader to the data stream
050             */
051            public ICalRawReader(Reader reader) {
052                    this.reader = new FoldedLineReader(reader);
053            }
054    
055            /**
056             * Gets the line number of the last line that was read.
057             * @return the line number
058             */
059            public int getLineNum() {
060                    return reader.getLineNum();
061            }
062    
063            /**
064             * Starts or continues reading from the iCalendar data stream.
065             * @param listener handles the iCalendar data as it is read off the wire
066             * @throws IOException if there is an I/O problem
067             */
068            public void start(ICalDataStreamListener listener) throws IOException {
069                    String line;
070                    while ((line = reader.readLine()) != null) {
071                            try {
072                                    parseLine(line, listener);
073                            } catch (StopReadingException e) {
074                                    return;
075                            }
076                    }
077                    eof = true;
078            }
079    
080            private void parseLine(String line, ICalDataStreamListener listener) {
081                    String propertyName = null;
082                    ICalParameters parameters = new ICalParameters();
083                    String value = null;
084    
085                    char escapeChar = 0; //is the next char escaped?
086                    boolean inQuotes = false; //are we inside of double quotes?
087                    StringBuilder buffer = new StringBuilder();
088                    String curParamName = null;
089                    for (int i = 0; i < line.length(); i++) {
090                            char ch = line.charAt(i);
091                            if (escapeChar != 0) {
092                                    if (escapeChar == '\\') {
093                                            //backslash escaping in parameter values is not part of the standard
094                                            if (ch == '\\') {
095                                                    buffer.append(ch);
096                                            } else if (ch == 'n' || ch == 'N') {
097                                                    //newlines
098                                                    buffer.append(NEWLINE);
099                                            } else if (ch == '"') {
100                                                    //incase a double quote is escaped with a backslash
101                                                    buffer.append(ch);
102                                            } else {
103                                                    //treat the escape character as a normal character because it's not a valid escape sequence
104                                                    buffer.append(escapeChar).append(ch);
105                                            }
106                                    } else if (escapeChar == '^') {
107                                            if (ch == '^') {
108                                                    buffer.append(ch);
109                                            } else if (ch == 'n') {
110                                                    buffer.append(NEWLINE);
111                                            } else if (ch == '\'') {
112                                                    buffer.append('"');
113                                            } else {
114                                                    //treat the escape character as a normal character because it's not a valid escape sequence
115                                                    buffer.append(escapeChar).append(ch);
116                                            }
117                                    }
118                                    escapeChar = 0;
119                            } else if (ch == '\\' || (ch == '^' && caretDecodingEnabled)) {
120                                    escapeChar = ch;
121                            } else if ((ch == ';' || ch == ':') && !inQuotes) {
122                                    if (propertyName == null) {
123                                            propertyName = buffer.toString();
124                                    } else if (curParamName == null) {
125                                            //value-less parameter (bad iCal syntax)
126                                            String parameterName = buffer.toString();
127                                            listener.valuelessParameter(propertyName, parameterName);
128                                            parameters.put(parameterName, null);
129                                    } else {
130                                            //parameter value
131                                            String paramValue = buffer.toString();
132                                            parameters.put(curParamName, paramValue);
133                                            curParamName = null;
134                                    }
135                                    buffer.setLength(0);
136    
137                                    if (ch == ':') {
138                                            if (i < line.length() - 1) {
139                                                    value = line.substring(i + 1);
140                                            } else {
141                                                    value = "";
142                                            }
143                                            break;
144                                    }
145                            } else if (ch == ',' && !inQuotes) {
146                                    //multi-valued parameter
147                                    parameters.put(curParamName, buffer.toString());
148                                    buffer.setLength(0);
149                            } else if (ch == '=' && curParamName == null) {
150                                    //parameter name
151                                    curParamName = buffer.toString();
152                                    buffer.setLength(0);
153                            } else if (ch == '"') {
154                                    inQuotes = !inQuotes;
155                            } else {
156                                    buffer.append(ch);
157                            }
158                    }
159    
160                    if (propertyName == null || value == null) {
161                            listener.invalidLine(line);
162                            return;
163                    }
164                    if ("BEGIN".equalsIgnoreCase(propertyName)) {
165                            listener.beginComponent(value);
166                            return;
167                    }
168                    if ("END".equalsIgnoreCase(propertyName)) {
169                            listener.endComponent(value);
170                            return;
171                    }
172                    listener.readProperty(propertyName, parameters, value);
173            }
174    
175            /**
176             * <p>
177             * Gets whether the reader will decode parameter values that use circumflex
178             * accent encoding (enabled by default). This escaping mechanism allows
179             * newlines and double quotes to be included in parameter values.
180             * </p>
181             * 
182             * <table border="1">
183             * <tr>
184             * <th>Raw Character</th>
185             * <th>Encoded Character</th>
186             * </tr>
187             * <tr>
188             * <td>{@code "}</td>
189             * <td>{@code ^'}</td>
190             * </tr>
191             * <tr>
192             * <td><i>newline</i></td>
193             * <td>{@code ^n}</td>
194             * </tr>
195             * <tr>
196             * <td>{@code ^}</td>
197             * <td>{@code ^^}</td>
198             * </tr>
199             * </table>
200             * 
201             * <p>
202             * Example:
203             * </p>
204             * 
205             * <pre>
206             * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
207             *  sburgh, PA 15212":40.446816;80.00566
208             * </pre>
209             * 
210             * @return true if circumflex accent decoding is enabled, false if not
211             * @rfc 6868
212             */
213            public boolean isCaretDecodingEnabled() {
214                    return caretDecodingEnabled;
215            }
216    
217            /**
218             * <p>
219             * Sets whether the reader will decode parameter values that use circumflex
220             * accent encoding (enabled by default). This escaping mechanism allows
221             * newlines and double quotes to be included in parameter values.
222             * </p>
223             * 
224             * <table border="1">
225             * <tr>
226             * <th>Raw Character</th>
227             * <th>Encoded Character</th>
228             * </tr>
229             * <tr>
230             * <td>{@code "}</td>
231             * <td>{@code ^'}</td>
232             * </tr>
233             * <tr>
234             * <td><i>newline</i></td>
235             * <td>{@code ^n}</td>
236             * </tr>
237             * <tr>
238             * <td>{@code ^}</td>
239             * <td>{@code ^^}</td>
240             * </tr>
241             * </table>
242             * 
243             * <p>
244             * Example:
245             * </p>
246             * 
247             * <pre>
248             * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
249             *  sburgh, PA 15212":geo:40.446816,-80.00566
250             * </pre>
251             * 
252             * @param enable true to use circumflex accent decoding, false not to
253             * @rfc 6868
254             */
255            public void setCaretDecodingEnabled(boolean enable) {
256                    caretDecodingEnabled = enable;
257            }
258    
259            /**
260             * Determines whether the end of the data stream has been reached.
261             * @return true if the end has been reached, false if not
262             */
263            public boolean eof() {
264                    return eof;
265            }
266    
267            /**
268             * Handles the iCalendar data as it is read off the data stream. Each one of
269             * this interface's methods may throw a {@link StopReadingException} at any
270             * time to force the parser to stop reading from the data stream. This will
271             * cause the reader to return from the {@link ICalRawReader#start} method.
272             * To continue reading from the data stream, simply call the
273             * {@link ICalRawReader#start} method again.
274             * @author Michael Angstadt
275             */
276            public static interface ICalDataStreamListener {
277                    /**
278                     * Called when a component begins (when a "BEGIN:NAME" property is
279                     * reached).
280                     * @param name the component name (e.g. "VEVENT")
281                     * @throws StopReadingException to force the reader to stop reading from
282                     * the data stream
283                     */
284                    void beginComponent(String name);
285    
286                    /**
287                     * Called when a property is read.
288                     * @param name the property name (e.g. "VERSION")
289                     * @param parameters the parameters
290                     * @param value the property value
291                     * @throws StopReadingException to force the reader to stop reading from
292                     * the data stream
293                     */
294                    void readProperty(String name, ICalParameters parameters, String value);
295    
296                    /**
297                     * Called when a component ends (when a "END:NAME" property is reached).
298                     * @param name the component name (e.g. "VEVENT")
299                     * @throws StopReadingException to force the reader to stop reading from
300                     * the data stream
301                     */
302                    void endComponent(String name);
303    
304                    /**
305                     * Called when a line cannot be parsed.
306                     * @param line the unparseable line
307                     * @throws StopReadingException to force the reader to stop reading from
308                     * the data stream
309                     */
310                    void invalidLine(String line);
311    
312                    /**
313                     * Called when a value-less parameter is read.
314                     * @param propertyName the property name (e.g. "VERSION")
315                     * @param parameterName the parameter name (e.g. "FMTTYPE")
316                     */
317                    void valuelessParameter(String propertyName, String parameterName);
318            }
319    
320            /**
321             * Instructs an {@link ICalRawReader} to stop reading from the data stream
322             * when thrown from an {@link ICalDataStreamListener} implementation.
323             * @author Michael Angstadt
324             */
325            @SuppressWarnings("serial")
326            public static class StopReadingException extends ICalException {
327                    //empty
328            }
329    
330            /**
331             * Closes the underlying {@link Reader} object.
332             */
333            public void close() throws IOException {
334                    reader.close();
335            }
336    }