001package biweekly.io.text; 002 003import static biweekly.util.StringUtils.ltrim; 004 005import java.io.BufferedReader; 006import java.io.IOException; 007import java.io.InputStreamReader; 008import java.io.Reader; 009import java.io.StringReader; 010import java.nio.charset.Charset; 011import java.util.regex.Pattern; 012 013/* 014 Copyright (c) 2013-2015, Michael Angstadt 015 All rights reserved. 016 017 Redistribution and use in source and binary forms, with or without 018 modification, are permitted provided that the following conditions are met: 019 020 1. Redistributions of source code must retain the above copyright notice, this 021 list of conditions and the following disclaimer. 022 2. Redistributions in binary form must reproduce the above copyright notice, 023 this list of conditions and the following disclaimer in the documentation 024 and/or other materials provided with the distribution. 025 026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 027 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 028 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 029 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 030 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 031 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 032 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 033 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 034 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 035 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 036 */ 037 038/** 039 * Reads lines of text from a reader, transparently unfolding lines that are 040 * folded. 041 * @author Michael Angstadt 042 */ 043public class FoldedLineReader extends BufferedReader { 044 /** 045 * Regular expression used to detect the first line of folded, 046 * "quoted-printable" property values. 047 */ 048 private final Pattern foldedQuotedPrintableValueRegex = Pattern.compile("[^:]*?QUOTED-PRINTABLE.*?:.*?=", Pattern.CASE_INSENSITIVE); 049 050 private String lastLine; 051 private boolean singleSpaceFolding = true; 052 private int lastLineNum = 0, lineCount = 0; 053 private final Charset charset; 054 055 /** 056 * Creates a folded line reader. 057 * @param reader the reader object to wrap 058 */ 059 public FoldedLineReader(Reader reader) { 060 super(reader); 061 062 if (reader instanceof InputStreamReader) { 063 InputStreamReader isr = (InputStreamReader) reader; 064 String charsetStr = isr.getEncoding(); 065 charset = (charsetStr == null) ? null : Charset.forName(charsetStr); 066 } else { 067 charset = null; 068 } 069 } 070 071 /** 072 * Creates a folded line reader. 073 * @param text the text to read 074 */ 075 public FoldedLineReader(String text) { 076 this(new StringReader(text)); 077 } 078 079 /** 080 * Sets whether the reader will only ignore the first whitespace character 081 * it encounters at the beginning of a folded line. This setting is enabled 082 * by default in order to support iCalendar files generated by Outlook. 083 * @param enabled true to enable (default), false to disable 084 */ 085 public void setSingleSpaceFoldingEnabled(boolean enabled) { 086 singleSpaceFolding = enabled; 087 } 088 089 /** 090 * Gets whether the reader will only ignore the first whitespace character 091 * it encounters at the beginning of a folded line. This setting is enabled 092 * by default in order to support iCalendar files generated by Outlook. 093 * @return true if enabled (default), false if disabled 094 */ 095 public boolean isSingleSpaceFoldingEnabled() { 096 return singleSpaceFolding; 097 } 098 099 /** 100 * Gets the starting line number of the last unfolded line that was read. 101 * @return the line number 102 */ 103 public int getLineNum() { 104 return lastLineNum; 105 } 106 107 /** 108 * Gets the character encoding of the reader. 109 * @return the character encoding or null if none is defined 110 */ 111 public Charset getEncoding() { 112 return charset; 113 } 114 115 /** 116 * Reads the next non-empty line. 117 * @return the next non-empty line or null of EOF 118 * @throws IOException if there's a problem reading from the reader 119 */ 120 private String readNonEmptyLine() throws IOException { 121 while (true) { 122 String line = super.readLine(); 123 if (line == null) { 124 return null; 125 } 126 127 lineCount++; 128 if (line.length() > 0) { 129 return line; 130 } 131 } 132 } 133 134 /** 135 * Reads the next unfolded line. 136 * @return the next unfolded line or null if the end of the stream has been 137 * reached 138 * @throws IOException if there's a problem reading from the reader 139 */ 140 @Override 141 public String readLine() throws IOException { 142 String wholeLine = (lastLine == null) ? readNonEmptyLine() : lastLine; 143 lastLine = null; 144 if (wholeLine == null) { 145 //end of stream 146 return null; 147 } 148 149 //@formatter:off 150 /* 151 * Lines that are QUOTED-PRINTABLE are folded in a strange way. A "=" is 152 * appended to the end of a line to signal that the next line is folded. 153 * Also, each folded line is *not* prepend with whitespace (which it should 154 * be, according to the 2.1 specs). 155 * 156 * For example: 157 * 158 * ------------ 159 * BEGIN:VCARD 160 * NOTE;QUOTED-PRINTABLE: This is an=0D=0A= 161 * annoyingly formatted=0D=0A= 162 * note= 163 * 164 * END:VCARD 165 * ------------ 166 * 167 * In the example above, note how there is an empty line directly above 168 * END. This is still part of the NOTE property value because the 3rd 169 * line of NOTE ends with a "=". 170 * 171 * This behavior has only been observed in Outlook vCards. >:( 172 */ 173 //@formatter:on 174 175 boolean foldedQuotedPrintableLine = foldedQuotedPrintableValueRegex.matcher(wholeLine).matches(); 176 if (foldedQuotedPrintableLine) { 177 //chop off the trailing "=" 178 wholeLine = chop(wholeLine); 179 } 180 181 lastLineNum = lineCount; 182 StringBuilder unfoldedLine = new StringBuilder(wholeLine); 183 while (true) { 184 String line = foldedQuotedPrintableLine ? super.readLine() : readNonEmptyLine(); 185 if (line == null) { 186 //end of stream 187 break; 188 } 189 190 if (foldedQuotedPrintableLine) { 191 //remove any folding whitespace 192 line = ltrim(line); 193 194 boolean endsInEquals = line.endsWith("="); 195 if (endsInEquals) { 196 //chop off the trailing "=" 197 line = chop(line); 198 } 199 200 unfoldedLine.append(line); 201 202 if (endsInEquals) { 203 //there are more folded lines 204 continue; 205 } 206 207 //end of the folded line 208 break; 209 } 210 211 if (line.length() > 0 && Character.isWhitespace(line.charAt(0))) { 212 //the line is folded 213 214 int lastWhitespace = 1; 215 if (!singleSpaceFolding) { 216 while (lastWhitespace < line.length() && Character.isWhitespace(line.charAt(lastWhitespace))) { 217 lastWhitespace++; 218 } 219 } 220 unfoldedLine.append(line.substring(lastWhitespace)); 221 222 continue; 223 } 224 225 lastLine = line; 226 break; 227 } 228 229 return unfoldedLine.toString(); 230 } 231 232 /** 233 * Removes the last character from a string. 234 * @param string the string 235 * @return the modified string 236 */ 237 private static String chop(String string) { 238 return (string.length() > 0) ? string.substring(0, string.length() - 1) : string; 239 } 240}