001package biweekly.io.text;
002
003import static biweekly.util.StringUtils.ltrim;
004
005import java.io.BufferedReader;
006import java.io.IOException;
007import java.io.InputStreamReader;
008import java.io.Reader;
009import java.io.StringReader;
010import java.nio.charset.Charset;
011import java.util.regex.Pattern;
012
013/*
014 Copyright (c) 2013-2015, Michael Angstadt
015 All rights reserved.
016
017 Redistribution and use in source and binary forms, with or without
018 modification, are permitted provided that the following conditions are met: 
019
020 1. Redistributions of source code must retain the above copyright notice, this
021 list of conditions and the following disclaimer. 
022 2. Redistributions in binary form must reproduce the above copyright notice,
023 this list of conditions and the following disclaimer in the documentation
024 and/or other materials provided with the distribution. 
025
026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
027 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
028 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
029 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
030 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
031 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
032 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
033 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
034 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
035 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
036 */
037
038/**
039 * Reads lines of text from a reader, transparently unfolding lines that are
040 * folded.
041 * @author Michael Angstadt
042 */
043public class FoldedLineReader extends BufferedReader {
044        /**
045         * Regular expression used to detect the first line of folded,
046         * "quoted-printable" property values.
047         */
048        private final Pattern foldedQuotedPrintableValueRegex = Pattern.compile("[^:]*?QUOTED-PRINTABLE.*?:.*?=", Pattern.CASE_INSENSITIVE);
049
050        private String lastLine;
051        private boolean singleSpaceFolding = true;
052        private int lastLineNum = 0, lineCount = 0;
053        private final Charset charset;
054
055        /**
056         * Creates a folded line reader.
057         * @param reader the reader object to wrap
058         */
059        public FoldedLineReader(Reader reader) {
060                super(reader);
061
062                if (reader instanceof InputStreamReader) {
063                        InputStreamReader isr = (InputStreamReader) reader;
064                        String charsetStr = isr.getEncoding();
065                        charset = (charsetStr == null) ? null : Charset.forName(charsetStr);
066                } else {
067                        charset = null;
068                }
069        }
070
071        /**
072         * Creates a folded line reader.
073         * @param text the text to read
074         */
075        public FoldedLineReader(String text) {
076                this(new StringReader(text));
077        }
078
079        /**
080         * Sets whether the reader will only ignore the first whitespace character
081         * it encounters at the beginning of a folded line. This setting is enabled
082         * by default in order to support iCalendar files generated by Outlook.
083         * @param enabled true to enable (default), false to disable
084         */
085        public void setSingleSpaceFoldingEnabled(boolean enabled) {
086                singleSpaceFolding = enabled;
087        }
088
089        /**
090         * Gets whether the reader will only ignore the first whitespace character
091         * it encounters at the beginning of a folded line. This setting is enabled
092         * by default in order to support iCalendar files generated by Outlook.
093         * @return true if enabled (default), false if disabled
094         */
095        public boolean isSingleSpaceFoldingEnabled() {
096                return singleSpaceFolding;
097        }
098
099        /**
100         * Gets the starting line number of the last unfolded line that was read.
101         * @return the line number
102         */
103        public int getLineNum() {
104                return lastLineNum;
105        }
106
107        /**
108         * Gets the character encoding of the reader.
109         * @return the character encoding or null if none is defined
110         */
111        public Charset getEncoding() {
112                return charset;
113        }
114
115        /**
116         * Reads the next non-empty line.
117         * @return the next non-empty line or null of EOF
118         * @throws IOException if there's a problem reading from the reader
119         */
120        private String readNonEmptyLine() throws IOException {
121                while (true) {
122                        String line = super.readLine();
123                        if (line == null) {
124                                return null;
125                        }
126
127                        lineCount++;
128                        if (line.length() > 0) {
129                                return line;
130                        }
131                }
132        }
133
134        /**
135         * Reads the next unfolded line.
136         * @return the next unfolded line or null if the end of the stream has been
137         * reached
138         * @throws IOException if there's a problem reading from the reader
139         */
140        @Override
141        public String readLine() throws IOException {
142                String wholeLine = (lastLine == null) ? readNonEmptyLine() : lastLine;
143                lastLine = null;
144                if (wholeLine == null) {
145                        //end of stream
146                        return null;
147                }
148
149                //@formatter:off
150                /*
151                 * Lines that are QUOTED-PRINTABLE are folded in a strange way. A "=" is
152                 * appended to the end of a line to signal that the next line is folded.
153                 * Also, each folded line is *not* prepend with whitespace (which it should
154                 * be, according to the 2.1 specs).
155                 * 
156                 * For example:
157                 * 
158                 * ------------
159                 * BEGIN:VCARD
160                 * NOTE;QUOTED-PRINTABLE: This is an=0D=0A=
161                 * annoyingly formatted=0D=0A=
162                 * note=
163                 * 
164                 * END:VCARD
165                 * ------------
166                 * 
167                 * In the example above, note how there is an empty line directly above
168                 * END. This is still part of the NOTE property value because the 3rd
169                 * line of NOTE ends with a "=".
170                 * 
171                 * This behavior has only been observed in Outlook vCards. >:(
172                 */
173                //@formatter:on
174
175                boolean foldedQuotedPrintableLine = foldedQuotedPrintableValueRegex.matcher(wholeLine).matches();
176                if (foldedQuotedPrintableLine) {
177                        //chop off the trailing "="
178                        wholeLine = chop(wholeLine);
179                }
180
181                lastLineNum = lineCount;
182                StringBuilder unfoldedLine = new StringBuilder(wholeLine);
183                while (true) {
184                        String line = foldedQuotedPrintableLine ? super.readLine() : readNonEmptyLine();
185                        if (line == null) {
186                                //end of stream
187                                break;
188                        }
189
190                        if (foldedQuotedPrintableLine) {
191                                //remove any folding whitespace
192                                line = ltrim(line);
193
194                                boolean endsInEquals = line.endsWith("=");
195                                if (endsInEquals) {
196                                        //chop off the trailing "="
197                                        line = chop(line);
198                                }
199
200                                unfoldedLine.append(line);
201
202                                if (endsInEquals) {
203                                        //there are more folded lines
204                                        continue;
205                                }
206
207                                //end of the folded line
208                                break;
209                        }
210
211                        if (line.length() > 0 && Character.isWhitespace(line.charAt(0))) {
212                                //the line is folded
213
214                                int lastWhitespace = 1;
215                                if (!singleSpaceFolding) {
216                                        while (lastWhitespace < line.length() && Character.isWhitespace(line.charAt(lastWhitespace))) {
217                                                lastWhitespace++;
218                                        }
219                                }
220                                unfoldedLine.append(line.substring(lastWhitespace));
221
222                                continue;
223                        }
224
225                        lastLine = line;
226                        break;
227                }
228
229                return unfoldedLine.toString();
230        }
231
232        /**
233         * Removes the last character from a string.
234         * @param string the string
235         * @return the modified string
236         */
237        private static String chop(String string) {
238                return (string.length() > 0) ? string.substring(0, string.length() - 1) : string;
239        }
240}