001package biweekly.io.text; 002 003import static biweekly.util.StringUtils.NEWLINE; 004 005import java.io.Closeable; 006import java.io.IOException; 007import java.io.Reader; 008import java.nio.charset.Charset; 009import java.util.ArrayList; 010import java.util.List; 011 012import biweekly.ICalVersion; 013import biweekly.parameter.ICalParameters; 014import biweekly.util.StringUtils; 015 016/* 017 Copyright (c) 2013-2015, Michael Angstadt 018 All rights reserved. 019 020 Redistribution and use in source and binary forms, with or without 021 modification, are permitted provided that the following conditions are met: 022 023 1. Redistributions of source code must retain the above copyright notice, this 024 list of conditions and the following disclaimer. 025 2. Redistributions in binary form must reproduce the above copyright notice, 026 this list of conditions and the following disclaimer in the documentation 027 and/or other materials provided with the distribution. 028 029 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 030 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 031 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 032 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 033 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 034 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 035 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 036 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 037 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 038 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 039 */ 040 041/** 042 * Parses the components out of each line in a plain-text iCalendar data stream. 043 * @author Michael Angstadt 044 * @see <a href="http://www.imc.org/pdi/pdiproddev.html">1.0 specs</a> 045 * @see <a href="https://tools.ietf.org/html/rfc2445">RFC 2445</a> 046 * @see <a href="http://tools.ietf.org/html/rfc5545">RFC 5545</a> 047 */ 048public class ICalRawReader implements Closeable { 049 private final FoldedLineReader reader; 050 private final List<String> components = new ArrayList<String>(); 051 private boolean caretDecodingEnabled = true; 052 private ICalVersion version = null; 053 054 /** 055 * @param reader the reader to wrap 056 */ 057 public ICalRawReader(Reader reader) { 058 this.reader = new FoldedLineReader(reader); 059 } 060 061 /** 062 * Gets the line number of the last line that was read. 063 * @return the line number 064 */ 065 public int getLineNum() { 066 return reader.getLineNum(); 067 } 068 069 /** 070 * Gets the iCalendar version that the reader is currently parsing with. 071 * @return the iCalendar version or null if unknown 072 */ 073 public ICalVersion getVersion() { 074 return version; 075 } 076 077 /** 078 * Parses the next line of the iCalendar file. 079 * @return the next line or null if there are no more lines 080 * @throws ICalParseException if a line cannot be parsed 081 * @throws IOException if there's a problem reading from the input stream 082 */ 083 public ICalRawLine readLine() throws IOException { 084 String line = reader.readLine(); 085 if (line == null) { 086 return null; 087 } 088 089 String propertyName = null; 090 ICalParameters parameters = new ICalParameters(); 091 String value = null; 092 093 char escapeChar = 0; //is the next char escaped? 094 boolean inQuotes = false; //are we inside of double quotes? 095 StringBuilder buffer = new StringBuilder(); 096 String curParamName = null; 097 for (int i = 0; i < line.length(); i++) { 098 char ch = line.charAt(i); 099 100 if (escapeChar != 0) { 101 //this character was escaped 102 if (escapeChar == '\\') { 103 //backslash escaping in parameter values is not part of the standard 104 if (ch == '\\') { 105 buffer.append(ch); 106 } else if (ch == 'n' || ch == 'N') { 107 //newlines 108 buffer.append(NEWLINE); 109 } else if (ch == '"' && version != ICalVersion.V1_0) { 110 //incase a double quote is escaped with a backslash 111 buffer.append(ch); 112 } else if (ch == ';' && version == ICalVersion.V1_0) { 113 //semi-colons can only be escaped in 1.- parameter values 114 //if a 2.0 param value has semi-colons, the value should be surrounded in double quotes 115 buffer.append(ch); 116 } else { 117 //treat the escape character as a normal character because it's not a valid escape sequence 118 buffer.append(escapeChar).append(ch); 119 } 120 } else if (escapeChar == '^') { 121 if (ch == '^') { 122 buffer.append(ch); 123 } else if (ch == 'n') { 124 buffer.append(NEWLINE); 125 } else if (ch == '\'') { 126 buffer.append('"'); 127 } else { 128 //treat the escape character as a normal character because it's not a valid escape sequence 129 buffer.append(escapeChar).append(ch); 130 } 131 } 132 escapeChar = 0; 133 continue; 134 } 135 136 if (ch == '\\' || (ch == '^' && version != ICalVersion.V1_0 && caretDecodingEnabled)) { 137 //an escape character was read 138 escapeChar = ch; 139 continue; 140 } 141 142 if ((ch == ';' || ch == ':') && !inQuotes) { 143 if (propertyName == null) { 144 //property name 145 propertyName = buffer.toString(); 146 } else { 147 //parameter value 148 String paramValue = buffer.toString(); 149 if (version == ICalVersion.V1_0) { 150 //1.0 allows whitespace to surround the "=", so remove it 151 paramValue = StringUtils.ltrim(paramValue); 152 } 153 parameters.put(curParamName, paramValue); 154 curParamName = null; 155 } 156 buffer.setLength(0); 157 158 if (ch == ':') { 159 //the rest of the line is the property value 160 if (i < line.length() - 1) { 161 value = line.substring(i + 1); 162 } else { 163 value = ""; 164 } 165 break; 166 } 167 continue; 168 } 169 170 if (ch == ',' && !inQuotes && version != ICalVersion.V1_0) { 171 //multi-valued parameter 172 parameters.put(curParamName, buffer.toString()); 173 buffer.setLength(0); 174 continue; 175 } 176 177 if (ch == '=' && curParamName == null) { 178 //parameter name 179 curParamName = buffer.toString(); 180 if (version == ICalVersion.V1_0) { 181 //2.1 allows whitespace to surround the "=", so remove it 182 curParamName = StringUtils.rtrim(curParamName); 183 } 184 buffer.setLength(0); 185 continue; 186 } 187 188 if (ch == '"' && version != ICalVersion.V1_0) { 189 //1.0 doesn't use the quoting mechanism 190 inQuotes = !inQuotes; 191 continue; 192 } 193 194 buffer.append(ch); 195 } 196 197 if (propertyName == null || value == null) { 198 throw new ICalParseException(line); 199 } 200 201 if ("BEGIN".equalsIgnoreCase(propertyName)) { 202 components.add(value.toUpperCase()); 203 } else if ("END".equalsIgnoreCase(propertyName)) { 204 int index = components.lastIndexOf(value.toUpperCase()); 205 if (index >= 0) { 206 components.subList(index, components.size()).clear(); 207 } 208 } else if ("VERSION".equalsIgnoreCase(propertyName) && isUnderVCalendar()) { 209 //only look at VERSION properties that are directly under the VCALENDAR component 210 ICalVersion version = ICalVersion.get(value); 211 if (version != null) { 212 //if the value is a valid version, then skip this property and parse the next 213 this.version = version; 214 return readLine(); 215 } 216 } 217 218 return new ICalRawLine(propertyName, parameters, value); 219 } 220 221 private boolean isUnderVCalendar() { 222 int firstIndex = components.indexOf("VCALENDAR"); 223 if (firstIndex < 0) { 224 return false; 225 } 226 227 int lastIndex = components.lastIndexOf("VCALENDAR"); 228 return firstIndex == lastIndex && firstIndex == components.size() - 1; 229 } 230 231 /** 232 * <p> 233 * Gets whether the reader will decode parameter values that use circumflex 234 * accent encoding (enabled by default). This escaping mechanism allows 235 * newlines and double quotes to be included in parameter values. 236 * </p> 237 * 238 * <table border="1"> 239 * <tr> 240 * <th>Raw Character</th> 241 * <th>Encoded Character</th> 242 * </tr> 243 * <tr> 244 * <td>{@code "}</td> 245 * <td>{@code ^'}</td> 246 * </tr> 247 * <tr> 248 * <td><i>newline</i></td> 249 * <td>{@code ^n}</td> 250 * </tr> 251 * <tr> 252 * <td>{@code ^}</td> 253 * <td>{@code ^^}</td> 254 * </tr> 255 * </table> 256 * 257 * <p> 258 * Example: 259 * </p> 260 * 261 * <pre> 262 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 263 * sburgh, PA 15212":40.446816;80.00566 264 * </pre> 265 * 266 * @return true if circumflex accent decoding is enabled, false if not 267 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 268 */ 269 public boolean isCaretDecodingEnabled() { 270 return caretDecodingEnabled; 271 } 272 273 /** 274 * <p> 275 * Sets whether the reader will decode parameter values that use circumflex 276 * accent encoding (enabled by default). This escaping mechanism allows 277 * newlines and double quotes to be included in parameter values. 278 * </p> 279 * 280 * <table border="1"> 281 * <tr> 282 * <th>Raw Character</th> 283 * <th>Encoded Character</th> 284 * </tr> 285 * <tr> 286 * <td>{@code "}</td> 287 * <td>{@code ^'}</td> 288 * </tr> 289 * <tr> 290 * <td><i>newline</i></td> 291 * <td>{@code ^n}</td> 292 * </tr> 293 * <tr> 294 * <td>{@code ^}</td> 295 * <td>{@code ^^}</td> 296 * </tr> 297 * </table> 298 * 299 * <p> 300 * Example: 301 * </p> 302 * 303 * <pre> 304 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 305 * sburgh, PA 15212":geo:40.446816,-80.00566 306 * </pre> 307 * 308 * @param enable true to use circumflex accent decoding, false not to 309 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 310 */ 311 public void setCaretDecodingEnabled(boolean enable) { 312 caretDecodingEnabled = enable; 313 } 314 315 /** 316 * Gets the character encoding of the reader. 317 * @return the character encoding or null if none is defined 318 */ 319 public Charset getEncoding() { 320 return reader.getEncoding(); 321 } 322 323 /** 324 * Closes the underlying {@link Reader} object. 325 */ 326 public void close() throws IOException { 327 reader.close(); 328 } 329}