001package biweekly.io.text;
002
003import static biweekly.util.StringUtils.NEWLINE;
004
005import java.io.Closeable;
006import java.io.IOException;
007import java.io.Reader;
008import java.nio.charset.Charset;
009import java.util.ArrayList;
010import java.util.List;
011
012import biweekly.ICalVersion;
013import biweekly.parameter.ICalParameters;
014import biweekly.util.StringUtils;
015
016/*
017 Copyright (c) 2013-2015, Michael Angstadt
018 All rights reserved.
019
020 Redistribution and use in source and binary forms, with or without
021 modification, are permitted provided that the following conditions are met: 
022
023 1. Redistributions of source code must retain the above copyright notice, this
024 list of conditions and the following disclaimer. 
025 2. Redistributions in binary form must reproduce the above copyright notice,
026 this list of conditions and the following disclaimer in the documentation
027 and/or other materials provided with the distribution. 
028
029 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
030 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
031 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
032 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
033 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
034 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
035 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
036 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
037 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
038 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
039 */
040
041/**
042 * Parses the components out of each line in a plain-text iCalendar data stream.
043 * @author Michael Angstadt
044 * @see <a href="http://www.imc.org/pdi/pdiproddev.html">1.0 specs</a>
045 * @see <a href="https://tools.ietf.org/html/rfc2445">RFC 2445</a>
046 * @see <a href="http://tools.ietf.org/html/rfc5545">RFC 5545</a>
047 */
048public class ICalRawReader implements Closeable {
049        private final FoldedLineReader reader;
050        private final List<String> components = new ArrayList<String>();
051        private boolean caretDecodingEnabled = true;
052        private ICalVersion version = null;
053
054        /**
055         * @param reader the reader to wrap
056         */
057        public ICalRawReader(Reader reader) {
058                this.reader = new FoldedLineReader(reader);
059        }
060
061        /**
062         * Gets the line number of the last line that was read.
063         * @return the line number
064         */
065        public int getLineNum() {
066                return reader.getLineNum();
067        }
068
069        /**
070         * Gets the iCalendar version that the reader is currently parsing with.
071         * @return the iCalendar version or null if unknown
072         */
073        public ICalVersion getVersion() {
074                return version;
075        }
076
077        /**
078         * Parses the next line of the iCalendar file.
079         * @return the next line or null if there are no more lines
080         * @throws ICalParseException if a line cannot be parsed
081         * @throws IOException if there's a problem reading from the input stream
082         */
083        public ICalRawLine readLine() throws IOException {
084                String line = reader.readLine();
085                if (line == null) {
086                        return null;
087                }
088
089                String propertyName = null;
090                ICalParameters parameters = new ICalParameters();
091                String value = null;
092
093                char escapeChar = 0; //is the next char escaped?
094                boolean inQuotes = false; //are we inside of double quotes?
095                StringBuilder buffer = new StringBuilder();
096                String curParamName = null;
097                for (int i = 0; i < line.length(); i++) {
098                        char ch = line.charAt(i);
099
100                        if (escapeChar != 0) {
101                                //this character was escaped
102                                if (escapeChar == '\\') {
103                                        //backslash escaping in parameter values is not part of the standard
104                                        if (ch == '\\') {
105                                                buffer.append(ch);
106                                        } else if (ch == 'n' || ch == 'N') {
107                                                //newlines
108                                                buffer.append(NEWLINE);
109                                        } else if (ch == '"' && version != ICalVersion.V1_0) {
110                                                //incase a double quote is escaped with a backslash
111                                                buffer.append(ch);
112                                        } else if (ch == ';' && version == ICalVersion.V1_0) {
113                                                //semi-colons can only be escaped in 1.- parameter values
114                                                //if a 2.0 param value has semi-colons, the value should be surrounded in double quotes
115                                                buffer.append(ch);
116                                        } else {
117                                                //treat the escape character as a normal character because it's not a valid escape sequence
118                                                buffer.append(escapeChar).append(ch);
119                                        }
120                                } else if (escapeChar == '^') {
121                                        if (ch == '^') {
122                                                buffer.append(ch);
123                                        } else if (ch == 'n') {
124                                                buffer.append(NEWLINE);
125                                        } else if (ch == '\'') {
126                                                buffer.append('"');
127                                        } else {
128                                                //treat the escape character as a normal character because it's not a valid escape sequence
129                                                buffer.append(escapeChar).append(ch);
130                                        }
131                                }
132                                escapeChar = 0;
133                                continue;
134                        }
135
136                        if (ch == '\\' || (ch == '^' && version != ICalVersion.V1_0 && caretDecodingEnabled)) {
137                                //an escape character was read
138                                escapeChar = ch;
139                                continue;
140                        }
141
142                        if ((ch == ';' || ch == ':') && !inQuotes) {
143                                if (propertyName == null) {
144                                        //property name
145                                        propertyName = buffer.toString();
146                                } else {
147                                        //parameter value
148                                        String paramValue = buffer.toString();
149                                        if (version == ICalVersion.V1_0) {
150                                                //1.0 allows whitespace to surround the "=", so remove it
151                                                paramValue = StringUtils.ltrim(paramValue);
152                                        }
153                                        parameters.put(curParamName, paramValue);
154                                        curParamName = null;
155                                }
156                                buffer.setLength(0);
157
158                                if (ch == ':') {
159                                        //the rest of the line is the property value
160                                        if (i < line.length() - 1) {
161                                                value = line.substring(i + 1);
162                                        } else {
163                                                value = "";
164                                        }
165                                        break;
166                                }
167                                continue;
168                        }
169
170                        if (ch == ',' && !inQuotes && version != ICalVersion.V1_0) {
171                                //multi-valued parameter
172                                parameters.put(curParamName, buffer.toString());
173                                buffer.setLength(0);
174                                continue;
175                        }
176
177                        if (ch == '=' && curParamName == null) {
178                                //parameter name
179                                curParamName = buffer.toString();
180                                if (version == ICalVersion.V1_0) {
181                                        //2.1 allows whitespace to surround the "=", so remove it
182                                        curParamName = StringUtils.rtrim(curParamName);
183                                }
184                                buffer.setLength(0);
185                                continue;
186                        }
187
188                        if (ch == '"' && version != ICalVersion.V1_0) {
189                                //1.0 doesn't use the quoting mechanism
190                                inQuotes = !inQuotes;
191                                continue;
192                        }
193
194                        buffer.append(ch);
195                }
196
197                if (propertyName == null || value == null) {
198                        throw new ICalParseException(line);
199                }
200
201                if ("BEGIN".equalsIgnoreCase(propertyName)) {
202                        components.add(value.toUpperCase());
203                } else if ("END".equalsIgnoreCase(propertyName)) {
204                        int index = components.lastIndexOf(value.toUpperCase());
205                        if (index >= 0) {
206                                components.subList(index, components.size()).clear();
207                        }
208                } else if ("VERSION".equalsIgnoreCase(propertyName) && isUnderVCalendar()) {
209                        //only look at VERSION properties that are directly under the VCALENDAR component
210                        ICalVersion version = ICalVersion.get(value);
211                        if (version != null) {
212                                //if the value is a valid version, then skip this property and parse the next
213                                this.version = version;
214                                return readLine();
215                        }
216                }
217
218                return new ICalRawLine(propertyName, parameters, value);
219        }
220
221        private boolean isUnderVCalendar() {
222                int firstIndex = components.indexOf("VCALENDAR");
223                if (firstIndex < 0) {
224                        return false;
225                }
226
227                int lastIndex = components.lastIndexOf("VCALENDAR");
228                return firstIndex == lastIndex && firstIndex == components.size() - 1;
229        }
230
231        /**
232         * <p>
233         * Gets whether the reader will decode parameter values that use circumflex
234         * accent encoding (enabled by default). This escaping mechanism allows
235         * newlines and double quotes to be included in parameter values.
236         * </p>
237         * 
238         * <table border="1">
239         * <tr>
240         * <th>Raw Character</th>
241         * <th>Encoded Character</th>
242         * </tr>
243         * <tr>
244         * <td>{@code "}</td>
245         * <td>{@code ^'}</td>
246         * </tr>
247         * <tr>
248         * <td><i>newline</i></td>
249         * <td>{@code ^n}</td>
250         * </tr>
251         * <tr>
252         * <td>{@code ^}</td>
253         * <td>{@code ^^}</td>
254         * </tr>
255         * </table>
256         * 
257         * <p>
258         * Example:
259         * </p>
260         * 
261         * <pre>
262         * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
263         *  sburgh, PA 15212":40.446816;80.00566
264         * </pre>
265         * 
266         * @return true if circumflex accent decoding is enabled, false if not
267         * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
268         */
269        public boolean isCaretDecodingEnabled() {
270                return caretDecodingEnabled;
271        }
272
273        /**
274         * <p>
275         * Sets whether the reader will decode parameter values that use circumflex
276         * accent encoding (enabled by default). This escaping mechanism allows
277         * newlines and double quotes to be included in parameter values.
278         * </p>
279         * 
280         * <table border="1">
281         * <tr>
282         * <th>Raw Character</th>
283         * <th>Encoded Character</th>
284         * </tr>
285         * <tr>
286         * <td>{@code "}</td>
287         * <td>{@code ^'}</td>
288         * </tr>
289         * <tr>
290         * <td><i>newline</i></td>
291         * <td>{@code ^n}</td>
292         * </tr>
293         * <tr>
294         * <td>{@code ^}</td>
295         * <td>{@code ^^}</td>
296         * </tr>
297         * </table>
298         * 
299         * <p>
300         * Example:
301         * </p>
302         * 
303         * <pre>
304         * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
305         *  sburgh, PA 15212":geo:40.446816,-80.00566
306         * </pre>
307         * 
308         * @param enable true to use circumflex accent decoding, false not to
309         * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
310         */
311        public void setCaretDecodingEnabled(boolean enable) {
312                caretDecodingEnabled = enable;
313        }
314
315        /**
316         * Gets the character encoding of the reader.
317         * @return the character encoding or null if none is defined
318         */
319        public Charset getEncoding() {
320                return reader.getEncoding();
321        }
322
323        /**
324         * Closes the underlying {@link Reader} object.
325         */
326        public void close() throws IOException {
327                reader.close();
328        }
329}