001package biweekly.io.text;
002
003import java.io.Closeable;
004import java.io.Flushable;
005import java.io.IOException;
006import java.io.Writer;
007import java.nio.charset.Charset;
008import java.util.BitSet;
009import java.util.Collections;
010import java.util.HashMap;
011import java.util.List;
012import java.util.Map;
013import java.util.regex.Pattern;
014
015import biweekly.ICalVersion;
016import biweekly.parameter.Encoding;
017import biweekly.parameter.ICalParameters;
018
019/*
020 Copyright (c) 2013-2015, Michael Angstadt
021 All rights reserved.
022
023 Redistribution and use in source and binary forms, with or without
024 modification, are permitted provided that the following conditions are met: 
025
026 1. Redistributions of source code must retain the above copyright notice, this
027 list of conditions and the following disclaimer. 
028 2. Redistributions in binary form must reproduce the above copyright notice,
029 this list of conditions and the following disclaimer in the documentation
030 and/or other materials provided with the distribution. 
031
032 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
033 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
034 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
035 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
036 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
037 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
038 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
039 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
040 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
041 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
042 */
043
044/**
045 * Writes data to an iCalendar data stream.
046 * @author Michael Angstadt
047 * @see <a href="http://tools.ietf.org/html/rfc5545">RFC 5545</a>
048 */
049public class ICalRawWriter implements Closeable, Flushable {
050        /**
051         * Regular expression used to determine if a parameter value needs to be
052         * quoted.
053         */
054        private static final Pattern quoteMeRegex = Pattern.compile(".*?[,:;].*");
055
056        /**
057         * Regular expression used to detect newline character sequences.
058         */
059        private static final Pattern newlineRegex = Pattern.compile("\\r\\n|\\r|\\n");
060
061        /**
062         * Regular expression used to determine if a property name contains any
063         * invalid characters.
064         */
065        private static final Pattern propertyNameRegex = Pattern.compile("(?i)[-a-z0-9]+");
066
067        /**
068         * The characters that are not valid in parameter values and that should be
069         * removed.
070         */
071        private static final Map<ICalVersion, BitSet> invalidParamValueChars;
072        static {
073                BitSet controlChars = new BitSet(128);
074                controlChars.set(0, 31);
075                controlChars.set(127);
076                controlChars.set('\t', false); //allow
077                controlChars.set('\n', false); //allow
078                controlChars.set('\r', false); //allow
079
080                Map<ICalVersion, BitSet> map = new HashMap<ICalVersion, BitSet>();
081
082                //1.0
083                {
084                        BitSet bitSet = new BitSet(128);
085                        bitSet.or(controlChars);
086
087                        bitSet.set(',');
088                        bitSet.set('.');
089                        bitSet.set(':');
090                        bitSet.set('=');
091                        bitSet.set('[');
092                        bitSet.set(']');
093
094                        map.put(ICalVersion.V1_0, bitSet);
095                }
096
097                //2.0
098                {
099                        BitSet bitSet = new BitSet(128);
100                        bitSet.or(controlChars);
101
102                        map.put(ICalVersion.V2_0_DEPRECATED, bitSet);
103                        map.put(ICalVersion.V2_0, bitSet);
104                }
105
106                invalidParamValueChars = Collections.unmodifiableMap(map);
107        }
108
109        private final FoldedLineWriter writer;
110        private boolean caretEncodingEnabled = false;
111        private ICalVersion version;
112
113        /**
114         * Creates an iCalendar raw writer.
115         * @param writer the writer to the data stream
116         * @param version the version to adhere to
117         */
118        public ICalRawWriter(Writer writer, ICalVersion version) {
119                this.writer = new FoldedLineWriter(writer);
120                this.version = version;
121        }
122
123        /**
124         * Gets the writer that this object wraps.
125         * @return the folded line writer
126         */
127        public FoldedLineWriter getFoldedLineWriter() {
128                return writer;
129        }
130
131        /**
132         * <p>
133         * Gets whether the writer will apply circumflex accent encoding on
134         * parameter values (disabled by default). This escaping mechanism allows
135         * for newlines and double quotes to be included in parameter values.
136         * </p>
137         * 
138         * <p>
139         * When disabled, the writer will replace newlines with spaces and double
140         * quotes with single quotes.
141         * </p>
142         * 
143         * <table border="1">
144         * <tr>
145         * <th>Character</th>
146         * <th>Replacement<br>
147         * (when disabled)</th>
148         * <th>Replacement<br>
149         * (when enabled)</th>
150         * </tr>
151         * <tr>
152         * <td>{@code "}</td>
153         * <td>{@code '}</td>
154         * <td>{@code ^'}</td>
155         * </tr>
156         * <tr>
157         * <td><i>newline</i></td>
158         * <td><code><i>space</i></code></td>
159         * <td>{@code ^n}</td>
160         * </tr>
161         * <tr>
162         * <td>{@code ^}</td>
163         * <td>{@code ^}</td>
164         * <td>{@code ^^}</td>
165         * </tr>
166         * </table>
167         * 
168         * <p>
169         * Example:
170         * </p>
171         * 
172         * <pre>
173         * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
174         *  sburgh, PA 15212":40.446816;80.00566
175         * </pre>
176         * 
177         * @return true if circumflex accent encoding is enabled, false if not
178         * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
179         */
180        public boolean isCaretEncodingEnabled() {
181                return caretEncodingEnabled;
182        }
183
184        /**
185         * <p>
186         * Sets whether the writer will apply circumflex accent encoding on
187         * parameter values (disabled by default). This escaping mechanism allows
188         * for newlines and double quotes to be included in parameter values.
189         * </p>
190         * 
191         * <p>
192         * When disabled, the writer will replace newlines with spaces and double
193         * quotes with single quotes.
194         * </p>
195         * 
196         * <table border="1">
197         * <tr>
198         * <th>Character</th>
199         * <th>Replacement<br>
200         * (when disabled)</th>
201         * <th>Replacement<br>
202         * (when enabled)</th>
203         * </tr>
204         * <tr>
205         * <td>{@code "}</td>
206         * <td>{@code '}</td>
207         * <td>{@code ^'}</td>
208         * </tr>
209         * <tr>
210         * <td><i>newline</i></td>
211         * <td><code><i>space</i></code></td>
212         * <td>{@code ^n}</td>
213         * </tr>
214         * <tr>
215         * <td>{@code ^}</td>
216         * <td>{@code ^}</td>
217         * <td>{@code ^^}</td>
218         * </tr>
219         * </table>
220         * 
221         * <p>
222         * Example:
223         * </p>
224         * 
225         * <pre>
226         * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
227         *  sburgh, PA 15212":40.446816;80.00566
228         * </pre>
229         * 
230         * @param enable true to use circumflex accent encoding, false not to
231         * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
232         */
233        public void setCaretEncodingEnabled(boolean enable) {
234                caretEncodingEnabled = enable;
235        }
236
237        /**
238         * Gets the iCalendar version that the writer is adhering to.
239         * @return the version
240         */
241        public ICalVersion getVersion() {
242                return version;
243        }
244
245        /**
246         * Sets the iCalendar version that the writer should adhere to.
247         * @param version the version
248         */
249        public void setVersion(ICalVersion version) {
250                this.version = version;
251        }
252
253        /**
254         * Writes a property marking the beginning of a component (in other words,
255         * writes a "BEGIN:NAME" property).
256         * @param componentName the component name (e.g. "VEVENT")
257         * @throws IOException if there's an I/O problem
258         */
259        public void writeBeginComponent(String componentName) throws IOException {
260                writeProperty("BEGIN", componentName);
261        }
262
263        /**
264         * Writes a property marking the end of a component (in other words, writes
265         * a "END:NAME" property).
266         * @param componentName the component name (e.g. "VEVENT")
267         * @throws IOException if there's an I/O problem
268         */
269        public void writeEndComponent(String componentName) throws IOException {
270                writeProperty("END", componentName);
271        }
272
273        /**
274         * Writes a "VERSION" property, based on the iCalendar version that the
275         * writer is adhering to.
276         * @throws IOException if there's an I/O problem
277         */
278        public void writeVersion() throws IOException {
279                writeProperty("VERSION", version.getVersion());
280        }
281
282        /**
283         * Writes a property to the iCalendar data stream.
284         * @param propertyName the property name (e.g. "VERSION")
285         * @param value the property value (e.g. "2.0")
286         * @throws IllegalArgumentException if the property name contains invalid
287         * characters
288         * @throws IOException if there's an I/O problem
289         */
290        public void writeProperty(String propertyName, String value) throws IOException {
291                writeProperty(propertyName, new ICalParameters(), value);
292        }
293
294        /**
295         * Writes a property to the iCalendar data stream.
296         * @param propertyName the property name (e.g. "VERSION")
297         * @param parameters the property parameters
298         * @param value the property value (e.g. "2.0")
299         * @throws IllegalArgumentException if the property name contains invalid
300         * characters
301         * @throws IOException if there's an I/O problem
302         */
303        public void writeProperty(String propertyName, ICalParameters parameters, String value) throws IOException {
304                //validate the property name
305                if (!propertyNameRegex.matcher(propertyName).matches()) {
306                        throw new IllegalArgumentException("Property name invalid.  Property names can only contain letters, numbers, and hyphens.");
307                }
308
309                value = sanitizeValue(parameters, value);
310
311                //determine if the property value must be encoded in quoted printable
312                //and determine the charset to use when encoding to quoted-printable
313                boolean quotedPrintable = (parameters.getEncoding() == Encoding.QUOTED_PRINTABLE);
314                Charset charset = null;
315                if (quotedPrintable) {
316                        String charsetParam = parameters.getCharset();
317                        if (charsetParam == null) {
318                                charset = Charset.forName("UTF-8");
319                        } else {
320                                try {
321                                        charset = Charset.forName(charsetParam);
322                                } catch (Throwable t) {
323                                        charset = Charset.forName("UTF-8");
324                                }
325                        }
326                        parameters.setCharset(charset.name());
327                }
328
329                //write the property name
330                writer.append(propertyName);
331
332                //write the parameters
333                for (Map.Entry<String, List<String>> subType : parameters) {
334                        String parameterName = subType.getKey();
335                        List<String> parameterValues = subType.getValue();
336                        if (parameterValues.isEmpty()) {
337                                continue;
338                        }
339
340                        if (version == ICalVersion.V1_0) {
341                                //e.g. ADR;FOO=bar;FOO=car:
342                                for (String parameterValue : parameterValues) {
343                                        parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName);
344                                        writer.append(';').append(parameterName).append('=').append(parameterValue);
345                                }
346                                continue;
347                        }
348
349                        //e.g. ADR;TYPE=home,work,"another,value":
350                        boolean first = true;
351                        writer.append(';').append(parameterName).append('=');
352                        for (String parameterValue : parameterValues) {
353                                if (!first) {
354                                        writer.append(',');
355                                }
356
357                                parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName);
358
359                                //surround with double quotes if contains special chars
360                                if (quoteMeRegex.matcher(parameterValue).matches()) {
361                                        writer.append('"');
362                                        writer.append(parameterValue);
363                                        writer.append('"');
364                                } else {
365                                        writer.append(parameterValue);
366                                }
367
368                                first = false;
369                        }
370                }
371
372                writer.append(':');
373
374                //write the property value
375                writer.append(value, quotedPrintable, charset);
376                writer.append(writer.getNewline());
377        }
378
379        /**
380         * Sanitizes a property value for safe inclusion in a vCard.
381         * @param parameters the parameters
382         * @param value the value to sanitize
383         * @return the sanitized value
384         */
385        private String sanitizeValue(ICalParameters parameters, String value) {
386                if (value == null) {
387                        return "";
388                }
389
390                if (version == ICalVersion.V1_0 && containsNewlines(value)) {
391                        //1.0 does not support the "\n" escape sequence (see "Delimiters" sub-section in section 2 of the specs)
392                        parameters.setEncoding(Encoding.QUOTED_PRINTABLE);
393                        return value;
394                }
395
396                return escapeNewlines(value);
397        }
398
399        /**
400         * Removes or escapes all invalid characters in a parameter value.
401         * @param parameterValue the parameter value
402         * @param parameterName the parameter name
403         * @param propertyName the name of the property to which the parameter
404         * belongs
405         * @return the sanitized parameter value
406         */
407        private String sanitizeParameterValue(String parameterValue, String parameterName, String propertyName) {
408                //remove invalid characters
409                parameterValue = removeInvalidParameterValueChars(parameterValue);
410
411                switch (version) {
412                case V1_0:
413                        //replace newlines with spaces
414                        parameterValue = newlineRegex.matcher(parameterValue).replaceAll(" ");
415
416                        //escape backslashes
417                        parameterValue = parameterValue.replace("\\", "\\\\");
418
419                        //escape semi-colons (see section 2)
420                        parameterValue = parameterValue.replace(";", "\\;");
421
422                        break;
423
424                default:
425                        if (caretEncodingEnabled) {
426                                //apply caret encoding
427                                parameterValue = applyCaretEncoding(parameterValue);
428                        } else {
429                                //replace double quotes with single quotes
430                                parameterValue = parameterValue.replace('"', '\'');
431
432                                //replace newlines with spaces
433                                parameterValue = newlineRegex.matcher(parameterValue).replaceAll(" ");
434                        }
435
436                        break;
437                }
438
439                return parameterValue;
440        }
441
442        /**
443         * Removes invalid characters from a parameter value.
444         * @param value the parameter value
445         * @return the sanitized parameter value
446         */
447        private String removeInvalidParameterValueChars(String value) {
448                BitSet invalidChars = invalidParamValueChars.get(version);
449                StringBuilder sb = null;
450
451                for (int i = 0; i < value.length(); i++) {
452                        char ch = value.charAt(i);
453                        if (invalidChars.get(ch)) {
454                                if (sb == null) {
455                                        sb = new StringBuilder(value.length());
456                                        sb.append(value.substring(0, i));
457                                }
458                                continue;
459                        }
460
461                        if (sb != null) {
462                                sb.append(ch);
463                        }
464                }
465
466                return (sb == null) ? value : sb.toString();
467        }
468
469        /**
470         * Applies circumflex accent encoding to a string.
471         * @param value the string
472         * @return the encoded string
473         */
474        private String applyCaretEncoding(String value) {
475                value = value.replace("^", "^^");
476                value = newlineRegex.matcher(value).replaceAll("^n");
477                value = value.replace("\"", "^'");
478                return value;
479        }
480
481        /**
482         * Escapes all newline characters.
483         * <p>
484         * This method escapes the following newline sequences:
485         * </p>
486         * <ul>
487         * <li>{@code \r\n}</li>
488         * <li>{@code \r}</li>
489         * <li>{@code \n}</li>
490         * </ul>
491         * @param text the text to escape
492         * @return the escaped text
493         */
494        private String escapeNewlines(String text) {
495                return newlineRegex.matcher(text).replaceAll("\\\\n");
496        }
497
498        /**
499         * <p>
500         * Determines if a string has at least one newline character sequence. The
501         * newline character sequences are:
502         * </p>
503         * <ul>
504         * <li>{@code \r\n}</li>
505         * <li>{@code \r}</li>
506         * <li>{@code \n}</li>
507         * </ul>
508         * @param text the text to escape
509         * @return the escaped text
510         */
511        private boolean containsNewlines(String text) {
512                return newlineRegex.matcher(text).find();
513        }
514
515        /**
516         * Flushes the underlying {@link Writer} object.
517         * @throws IOException if there's a problem flushing the writer
518         */
519        public void flush() throws IOException {
520                writer.flush();
521        }
522
523        /**
524         * Closes the underlying {@link Writer} object.
525         */
526        public void close() throws IOException {
527                writer.close();
528        }
529}