001package biweekly.io.text;
002
003import java.io.BufferedReader;
004import java.io.IOException;
005import java.io.InputStreamReader;
006import java.io.Reader;
007import java.io.StringReader;
008import java.nio.charset.Charset;
009import java.util.regex.Pattern;
010
011import biweekly.util.StringUtils;
012
013/*
014 Copyright (c) 2013-2015, Michael Angstadt
015 All rights reserved.
016
017 Redistribution and use in source and binary forms, with or without
018 modification, are permitted provided that the following conditions are met: 
019
020 1. Redistributions of source code must retain the above copyright notice, this
021 list of conditions and the following disclaimer. 
022 2. Redistributions in binary form must reproduce the above copyright notice,
023 this list of conditions and the following disclaimer in the documentation
024 and/or other materials provided with the distribution. 
025
026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
027 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
028 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
029 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
030 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
031 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
032 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
033 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
034 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
035 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
036 */
037
038/**
039 * Automatically unfolds lines of text as they are read.
040 * @author Michael Angstadt
041 */
042public class FoldedLineReader extends BufferedReader {
043        /**
044         * Regular expression used to detect "quoted-printable" property values.
045         */
046        private static final Pattern foldedQuotedPrintableValueRegex = Pattern.compile("[^:]*?QUOTED-PRINTABLE.*?:.*?=", Pattern.CASE_INSENSITIVE);
047
048        private String lastLine;
049        private boolean singleSpaceFolding = true;
050        private int lastLineNum = 0, lineCount = 0;
051        private final Charset charset;
052
053        /**
054         * Creates a folded line reader.
055         * @param reader the reader object to wrap
056         */
057        public FoldedLineReader(Reader reader) {
058                super(reader);
059                if (reader instanceof InputStreamReader) {
060                        InputStreamReader isr = (InputStreamReader) reader;
061                        String charsetStr = isr.getEncoding();
062                        charset = (charsetStr == null) ? null : Charset.forName(charsetStr);
063                } else {
064                        charset = null;
065                }
066        }
067
068        /**
069         * Creates a folded line reader.
070         * @param text the text to read
071         */
072        public FoldedLineReader(String text) {
073                this(new StringReader(text));
074        }
075
076        /**
077         * Sets whether the reader will only ignore the first whitespace character
078         * it encounters at the beginning of a folded line. This setting is enabled
079         * by default in order to support iCalendar files generated by Outlook.
080         * @param enabled true to enable (default), false to disable
081         */
082        public void setSingleSpaceFoldingEnabled(boolean enabled) {
083                singleSpaceFolding = enabled;
084        }
085
086        /**
087         * Gets whether the reader will only ignore the first whitespace character
088         * it encounters at the beginning of a folded line. This setting is enabled
089         * by default in order to support iCalendar files generated by Outlook.
090         * @return true if enabled (default), false if disabled
091         */
092        public boolean isSingleSpaceFoldingEnabled() {
093                return singleSpaceFolding;
094        }
095
096        /**
097         * Gets the starting line number of the last unfolded line that was read.
098         * @return the line number
099         */
100        public int getLineNum() {
101                return lastLineNum;
102        }
103
104        /**
105         * Gets the character encoding of the reader.
106         * @return the character encoding or null if none is defined
107         */
108        public Charset getEncoding() {
109                return charset;
110        }
111
112        /**
113         * Reads the next non-empty line. Empty lines must be ignored because some
114         * vCards (i.e. iPhone) contain empty lines. These empty lines appear in
115         * between folded lines, which, if not ignored, will cause the parser to
116         * incorrectly parse the vCard.
117         * @return the next non-empty line or null of EOF
118         * @throws IOException if there's a problem reading from the reader
119         */
120        private String readNonEmptyLine() throws IOException {
121                String line;
122                do {
123                        line = super.readLine();
124                        if (line != null) {
125                                lineCount++;
126                        }
127                } while (line != null && line.length() == 0);
128                return line;
129        }
130
131        /**
132         * Reads the next unfolded line.
133         * @return the next unfolded line or null if EOF
134         * @throws IOException if there's a problem reading from the reader
135         */
136        @Override
137        public String readLine() throws IOException {
138                String wholeLine = (lastLine == null) ? readNonEmptyLine() : lastLine;
139                lastLine = null;
140                if (wholeLine == null) {
141                        //end of stream
142                        return null;
143                }
144
145                //@formatter:off
146                /*
147                 * Lines that are QUOTED-PRINTABLE are folded in a strange way. A "=" is
148                 * appended to the end of a line to signal that the next line is folded.
149                 * Also, each folded line is not prepend with whitespace.
150                 * 
151                 * For example:
152                 * 
153                 * ------------
154                 * BEGIN:VCARD
155                 * NOTE;QUOTED-PRINTABLE: This is an=0D=0A=
156                 * annoyingly formatted=0D=0A=
157                 * note=
158                 * 
159                 * END:VCARD
160                 * ------------
161                 * 
162                 * In the example above, note how there is an empty line directly above
163                 * END. This is still part of the NOTE property value because the 3rd
164                 * line of NOTE ends with a "=".
165                 * 
166                 * This behavior has only been observed in Outlook vCards.
167                 */
168                //@formatter:on
169
170                boolean foldedQuotedPrintableLine = false;
171                if (foldedQuotedPrintableValueRegex.matcher(wholeLine).matches()) {
172                        foldedQuotedPrintableLine = true;
173
174                        //chop off the trailing "="
175                        wholeLine = wholeLine.substring(0, wholeLine.length() - 1);
176                }
177
178                lastLineNum = lineCount;
179                StringBuilder unfoldedLine = new StringBuilder(wholeLine);
180                while (true) {
181                        String line = foldedQuotedPrintableLine ? super.readLine() : readNonEmptyLine();
182                        if (line == null) {
183                                //end of stream
184                                break;
185                        }
186
187                        if (foldedQuotedPrintableLine) {
188                                line = StringUtils.ltrim(line);
189
190                                boolean endsInEquals = line.endsWith("=");
191                                if (endsInEquals) {
192                                        //chop off the trailing "="
193                                        line = line.substring(0, line.length() - 1);
194                                }
195
196                                unfoldedLine.append(line);
197
198                                if (endsInEquals) {
199                                        //there are more folded lines
200                                        continue;
201                                } else {
202                                        //end of the folded line
203                                        break;
204                                }
205                        }
206
207                        if (line.length() > 0 && Character.isWhitespace(line.charAt(0))) {
208                                //the line is folded
209
210                                int lastWhitespace = 1;
211                                if (!singleSpaceFolding) {
212                                        while (lastWhitespace < line.length() && Character.isWhitespace(line.charAt(lastWhitespace))) {
213                                                lastWhitespace++;
214                                        }
215                                }
216                                unfoldedLine.append(line.substring(lastWhitespace));
217
218                                continue;
219                        }
220
221                        lastLine = line;
222                        break;
223                }
224
225                return unfoldedLine.toString();
226        }
227}