001package biweekly.io.text; 002 003import java.io.BufferedReader; 004import java.io.IOException; 005import java.io.InputStreamReader; 006import java.io.Reader; 007import java.io.StringReader; 008import java.nio.charset.Charset; 009import java.util.regex.Pattern; 010 011import biweekly.util.StringUtils; 012 013/* 014 Copyright (c) 2013-2015, Michael Angstadt 015 All rights reserved. 016 017 Redistribution and use in source and binary forms, with or without 018 modification, are permitted provided that the following conditions are met: 019 020 1. Redistributions of source code must retain the above copyright notice, this 021 list of conditions and the following disclaimer. 022 2. Redistributions in binary form must reproduce the above copyright notice, 023 this list of conditions and the following disclaimer in the documentation 024 and/or other materials provided with the distribution. 025 026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 027 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 028 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 029 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 030 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 031 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 032 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 033 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 034 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 035 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 036 */ 037 038/** 039 * Automatically unfolds lines of text as they are read. 040 * @author Michael Angstadt 041 */ 042public class FoldedLineReader extends BufferedReader { 043 /** 044 * Regular expression used to detect "quoted-printable" property values. 045 */ 046 private static final Pattern foldedQuotedPrintableValueRegex = Pattern.compile("[^:]*?QUOTED-PRINTABLE.*?:.*?=", Pattern.CASE_INSENSITIVE); 047 048 private String lastLine; 049 private boolean singleSpaceFolding = true; 050 private int lastLineNum = 0, lineCount = 0; 051 private final Charset charset; 052 053 /** 054 * Creates a folded line reader. 055 * @param reader the reader object to wrap 056 */ 057 public FoldedLineReader(Reader reader) { 058 super(reader); 059 if (reader instanceof InputStreamReader) { 060 InputStreamReader isr = (InputStreamReader) reader; 061 String charsetStr = isr.getEncoding(); 062 charset = (charsetStr == null) ? null : Charset.forName(charsetStr); 063 } else { 064 charset = null; 065 } 066 } 067 068 /** 069 * Creates a folded line reader. 070 * @param text the text to read 071 */ 072 public FoldedLineReader(String text) { 073 this(new StringReader(text)); 074 } 075 076 /** 077 * Sets whether the reader will only ignore the first whitespace character 078 * it encounters at the beginning of a folded line. This setting is enabled 079 * by default in order to support iCalendar files generated by Outlook. 080 * @param enabled true to enable (default), false to disable 081 */ 082 public void setSingleSpaceFoldingEnabled(boolean enabled) { 083 singleSpaceFolding = enabled; 084 } 085 086 /** 087 * Gets whether the reader will only ignore the first whitespace character 088 * it encounters at the beginning of a folded line. This setting is enabled 089 * by default in order to support iCalendar files generated by Outlook. 090 * @return true if enabled (default), false if disabled 091 */ 092 public boolean isSingleSpaceFoldingEnabled() { 093 return singleSpaceFolding; 094 } 095 096 /** 097 * Gets the starting line number of the last unfolded line that was read. 098 * @return the line number 099 */ 100 public int getLineNum() { 101 return lastLineNum; 102 } 103 104 /** 105 * Gets the character encoding of the reader. 106 * @return the character encoding or null if none is defined 107 */ 108 public Charset getEncoding() { 109 return charset; 110 } 111 112 /** 113 * Reads the next non-empty line. Empty lines must be ignored because some 114 * vCards (i.e. iPhone) contain empty lines. These empty lines appear in 115 * between folded lines, which, if not ignored, will cause the parser to 116 * incorrectly parse the vCard. 117 * @return the next non-empty line or null of EOF 118 * @throws IOException if there's a problem reading from the reader 119 */ 120 private String readNonEmptyLine() throws IOException { 121 String line; 122 do { 123 line = super.readLine(); 124 if (line != null) { 125 lineCount++; 126 } 127 } while (line != null && line.length() == 0); 128 return line; 129 } 130 131 /** 132 * Reads the next unfolded line. 133 * @return the next unfolded line or null if EOF 134 * @throws IOException if there's a problem reading from the reader 135 */ 136 @Override 137 public String readLine() throws IOException { 138 String wholeLine = (lastLine == null) ? readNonEmptyLine() : lastLine; 139 lastLine = null; 140 if (wholeLine == null) { 141 //end of stream 142 return null; 143 } 144 145 //@formatter:off 146 /* 147 * Lines that are QUOTED-PRINTABLE are folded in a strange way. A "=" is 148 * appended to the end of a line to signal that the next line is folded. 149 * Also, each folded line is not prepend with whitespace. 150 * 151 * For example: 152 * 153 * ------------ 154 * BEGIN:VCARD 155 * NOTE;QUOTED-PRINTABLE: This is an=0D=0A= 156 * annoyingly formatted=0D=0A= 157 * note= 158 * 159 * END:VCARD 160 * ------------ 161 * 162 * In the example above, note how there is an empty line directly above 163 * END. This is still part of the NOTE property value because the 3rd 164 * line of NOTE ends with a "=". 165 * 166 * This behavior has only been observed in Outlook vCards. 167 */ 168 //@formatter:on 169 170 boolean foldedQuotedPrintableLine = false; 171 if (foldedQuotedPrintableValueRegex.matcher(wholeLine).matches()) { 172 foldedQuotedPrintableLine = true; 173 174 //chop off the trailing "=" 175 wholeLine = wholeLine.substring(0, wholeLine.length() - 1); 176 } 177 178 lastLineNum = lineCount; 179 StringBuilder unfoldedLine = new StringBuilder(wholeLine); 180 while (true) { 181 String line = foldedQuotedPrintableLine ? super.readLine() : readNonEmptyLine(); 182 if (line == null) { 183 //end of stream 184 break; 185 } 186 187 if (foldedQuotedPrintableLine) { 188 line = StringUtils.ltrim(line); 189 190 boolean endsInEquals = line.endsWith("="); 191 if (endsInEquals) { 192 //chop off the trailing "=" 193 line = line.substring(0, line.length() - 1); 194 } 195 196 unfoldedLine.append(line); 197 198 if (endsInEquals) { 199 //there are more folded lines 200 continue; 201 } else { 202 //end of the folded line 203 break; 204 } 205 } 206 207 if (line.length() > 0 && Character.isWhitespace(line.charAt(0))) { 208 //the line is folded 209 210 int lastWhitespace = 1; 211 if (!singleSpaceFolding) { 212 while (lastWhitespace < line.length() && Character.isWhitespace(line.charAt(lastWhitespace))) { 213 lastWhitespace++; 214 } 215 } 216 unfoldedLine.append(line.substring(lastWhitespace)); 217 218 continue; 219 } 220 221 lastLine = line; 222 break; 223 } 224 225 return unfoldedLine.toString(); 226 } 227}