001package biweekly.io.text; 002 003import static biweekly.util.StringUtils.NEWLINE; 004 005import java.io.Closeable; 006import java.io.IOException; 007import java.io.Reader; 008import java.nio.charset.Charset; 009import java.util.ArrayList; 010import java.util.List; 011 012import biweekly.ICalVersion; 013import biweekly.parameter.ICalParameters; 014import biweekly.util.StringUtils; 015 016/* 017 Copyright (c) 2013-2015, Michael Angstadt 018 All rights reserved. 019 020 Redistribution and use in source and binary forms, with or without 021 modification, are permitted provided that the following conditions are met: 022 023 1. Redistributions of source code must retain the above copyright notice, this 024 list of conditions and the following disclaimer. 025 2. Redistributions in binary form must reproduce the above copyright notice, 026 this list of conditions and the following disclaimer in the documentation 027 and/or other materials provided with the distribution. 028 029 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 030 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 031 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 032 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 033 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 034 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 035 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 036 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 037 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 038 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 039 */ 040 041/** 042 * Parses an iCalendar data stream. 043 * @author Michael Angstadt 044 * @see <a href="http://tools.ietf.org/html/rfc5545">RFC 5545</a> 045 */ 046public class ICalRawReader implements Closeable { 047 private final FoldedLineReader reader; 048 private final List<String> components = new ArrayList<String>(); 049 private boolean caretDecodingEnabled = true; 050 private ICalVersion version = null; 051 052 /** 053 * Creates a new reader. 054 * @param reader the reader to the data stream 055 */ 056 public ICalRawReader(Reader reader) { 057 this.reader = new FoldedLineReader(reader); 058 } 059 060 /** 061 * Gets the line number of the last line that was read. 062 * @return the line number 063 */ 064 public int getLineNum() { 065 return reader.getLineNum(); 066 } 067 068 /** 069 * Gets the iCalendar version that the reader is currently parsing with. 070 * @return the iCalendar version or null if unknown 071 */ 072 public ICalVersion getVersion() { 073 return version; 074 } 075 076 /** 077 * Parses the next line of the iCalendar file. 078 * @return the next line or null if there are no more lines 079 * @throws ICalParseException if a line cannot be parsed 080 * @throws IOException if there's a problem reading from the input stream 081 */ 082 public ICalRawLine readLine() throws IOException { 083 String line = reader.readLine(); 084 if (line == null) { 085 return null; 086 } 087 088 String propertyName = null; 089 ICalParameters parameters = new ICalParameters(); 090 String value = null; 091 092 char escapeChar = 0; //is the next char escaped? 093 boolean inQuotes = false; //are we inside of double quotes? 094 StringBuilder buffer = new StringBuilder(); 095 String curParamName = null; 096 for (int i = 0; i < line.length(); i++) { 097 char ch = line.charAt(i); 098 099 if (escapeChar != 0) { 100 //this character was escaped 101 if (escapeChar == '\\') { 102 //backslash escaping in parameter values is not part of the standard 103 if (ch == '\\') { 104 buffer.append(ch); 105 } else if (ch == 'n' || ch == 'N') { 106 //newlines 107 buffer.append(NEWLINE); 108 } else if (ch == '"' && version != ICalVersion.V1_0) { 109 //incase a double quote is escaped with a backslash 110 buffer.append(ch); 111 } else if (ch == ';' && version == ICalVersion.V1_0) { 112 //semi-colons can only be escaped in 1.- parameter values 113 //if a 2.0 param value has semi-colons, the value should be surrounded in double quotes 114 buffer.append(ch); 115 } else { 116 //treat the escape character as a normal character because it's not a valid escape sequence 117 buffer.append(escapeChar).append(ch); 118 } 119 } else if (escapeChar == '^') { 120 if (ch == '^') { 121 buffer.append(ch); 122 } else if (ch == 'n') { 123 buffer.append(NEWLINE); 124 } else if (ch == '\'') { 125 buffer.append('"'); 126 } else { 127 //treat the escape character as a normal character because it's not a valid escape sequence 128 buffer.append(escapeChar).append(ch); 129 } 130 } 131 escapeChar = 0; 132 continue; 133 } 134 135 if (ch == '\\' || (ch == '^' && version != ICalVersion.V1_0 && caretDecodingEnabled)) { 136 //an escape character was read 137 escapeChar = ch; 138 continue; 139 } 140 141 if ((ch == ';' || ch == ':') && !inQuotes) { 142 if (propertyName == null) { 143 //property name 144 propertyName = buffer.toString(); 145 } else { 146 //parameter value 147 String paramValue = buffer.toString(); 148 if (version == ICalVersion.V1_0) { 149 //1.0 allows whitespace to surround the "=", so remove it 150 paramValue = StringUtils.ltrim(paramValue); 151 } 152 parameters.put(curParamName, paramValue); 153 curParamName = null; 154 } 155 buffer.setLength(0); 156 157 if (ch == ':') { 158 //the rest of the line is the property value 159 if (i < line.length() - 1) { 160 value = line.substring(i + 1); 161 } else { 162 value = ""; 163 } 164 break; 165 } 166 continue; 167 } 168 169 if (ch == ',' && !inQuotes && version != ICalVersion.V1_0) { 170 //multi-valued parameter 171 parameters.put(curParamName, buffer.toString()); 172 buffer.setLength(0); 173 continue; 174 } 175 176 if (ch == '=' && curParamName == null) { 177 //parameter name 178 curParamName = buffer.toString(); 179 if (version == ICalVersion.V1_0) { 180 //2.1 allows whitespace to surround the "=", so remove it 181 curParamName = StringUtils.rtrim(curParamName); 182 } 183 buffer.setLength(0); 184 continue; 185 } 186 187 if (ch == '"' && version != ICalVersion.V1_0) { 188 //1.0 doesn't use the quoting mechanism 189 inQuotes = !inQuotes; 190 continue; 191 } 192 193 buffer.append(ch); 194 } 195 196 if (propertyName == null || value == null) { 197 throw new ICalParseException(line); 198 } 199 200 if ("BEGIN".equalsIgnoreCase(propertyName)) { 201 components.add(value.toUpperCase()); 202 } else if ("END".equalsIgnoreCase(propertyName)) { 203 int index = components.lastIndexOf(value.toUpperCase()); 204 if (index >= 0) { 205 components.subList(index, components.size()).clear(); 206 } 207 } else if ("VERSION".equalsIgnoreCase(propertyName) && isUnderVCalendar()) { 208 //only look at VERSION properties that are directly under the VCALENDAR component 209 ICalVersion version = ICalVersion.get(value); 210 if (version != null) { 211 //if the value is a valid version, then skip this property and parse the next 212 this.version = version; 213 return readLine(); 214 } 215 } 216 217 return new ICalRawLine(propertyName, parameters, value); 218 } 219 220 private boolean isUnderVCalendar() { 221 int firstIndex = components.indexOf("VCALENDAR"); 222 if (firstIndex < 0) { 223 return false; 224 } 225 226 int lastIndex = components.lastIndexOf("VCALENDAR"); 227 return firstIndex == lastIndex && firstIndex == components.size() - 1; 228 } 229 230 /** 231 * <p> 232 * Gets whether the reader will decode parameter values that use circumflex 233 * accent encoding (enabled by default). This escaping mechanism allows 234 * newlines and double quotes to be included in parameter values. 235 * </p> 236 * 237 * <table border="1"> 238 * <tr> 239 * <th>Raw Character</th> 240 * <th>Encoded Character</th> 241 * </tr> 242 * <tr> 243 * <td>{@code "}</td> 244 * <td>{@code ^'}</td> 245 * </tr> 246 * <tr> 247 * <td><i>newline</i></td> 248 * <td>{@code ^n}</td> 249 * </tr> 250 * <tr> 251 * <td>{@code ^}</td> 252 * <td>{@code ^^}</td> 253 * </tr> 254 * </table> 255 * 256 * <p> 257 * Example: 258 * </p> 259 * 260 * <pre> 261 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 262 * sburgh, PA 15212":40.446816;80.00566 263 * </pre> 264 * 265 * @return true if circumflex accent decoding is enabled, false if not 266 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 267 */ 268 public boolean isCaretDecodingEnabled() { 269 return caretDecodingEnabled; 270 } 271 272 /** 273 * <p> 274 * Sets whether the reader will decode parameter values that use circumflex 275 * accent encoding (enabled by default). This escaping mechanism allows 276 * newlines and double quotes to be included in parameter values. 277 * </p> 278 * 279 * <table border="1"> 280 * <tr> 281 * <th>Raw Character</th> 282 * <th>Encoded Character</th> 283 * </tr> 284 * <tr> 285 * <td>{@code "}</td> 286 * <td>{@code ^'}</td> 287 * </tr> 288 * <tr> 289 * <td><i>newline</i></td> 290 * <td>{@code ^n}</td> 291 * </tr> 292 * <tr> 293 * <td>{@code ^}</td> 294 * <td>{@code ^^}</td> 295 * </tr> 296 * </table> 297 * 298 * <p> 299 * Example: 300 * </p> 301 * 302 * <pre> 303 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 304 * sburgh, PA 15212":geo:40.446816,-80.00566 305 * </pre> 306 * 307 * @param enable true to use circumflex accent decoding, false not to 308 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 309 */ 310 public void setCaretDecodingEnabled(boolean enable) { 311 caretDecodingEnabled = enable; 312 } 313 314 /** 315 * Gets the character encoding of the reader. 316 * @return the character encoding or null if none is defined 317 */ 318 public Charset getEncoding() { 319 return reader.getEncoding(); 320 } 321 322 /** 323 * Closes the underlying {@link Reader} object. 324 */ 325 public void close() throws IOException { 326 reader.close(); 327 } 328}