001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.io.Writer; 022import java.nio.ByteBuffer; 023import java.nio.CharBuffer; 024import java.nio.charset.Charset; 025import java.nio.charset.CharsetDecoder; 026import java.nio.charset.CoderResult; 027import java.nio.charset.CodingErrorAction; 028 029/** 030 * {@link OutputStream} implementation that transforms a byte stream to a 031 * character stream using a specified charset encoding and writes the resulting 032 * stream to a {@link Writer}. The stream is transformed using a 033 * {@link CharsetDecoder} object, guaranteeing that all charset 034 * encodings supported by the JRE are handled correctly. 035 * <p> 036 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. 037 * This implies that the data is written to the underlying {@link Writer} in chunks 038 * that are no larger than the size of this buffer. By default, the buffer is 039 * flushed only when it overflows or when {@link #flush()} or {@link #close()} 040 * is called. In general there is therefore no need to wrap the underlying {@link Writer} 041 * in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can also 042 * be instructed to flush the buffer after each write operation. In this case, all 043 * available data is written immediately to the underlying {@link Writer}, implying that 044 * the current position of the {@link Writer} is correlated to the current position 045 * of the {@link WriterOutputStream}. 046 * <p> 047 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter}; 048 * in the following example, writing to {@code out2} would have the same result as writing to 049 * {@code out} directly (provided that the byte sequence is legal with respect to the 050 * charset encoding): 051 * <pre> 052 * OutputStream out = ... 053 * Charset cs = ... 054 * OutputStreamWriter writer = new OutputStreamWriter(out, cs); 055 * WriterOutputStream out2 = new WriterOutputStream(writer, cs);</pre> 056 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader}, 057 * except that the control flow is reversed: both classes transform a byte stream 058 * into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream, 059 * while {@link WriterOutputStream} pushes it to the underlying stream. 060 * <p> 061 * Note that while there are use cases where there is no alternative to using 062 * this class, very often the need to use this class is an indication of a flaw 063 * in the design of the code. This class is typically used in situations where an existing 064 * API only accepts an {@link OutputStream} object, but where the stream is known to represent 065 * character data that must be decoded for further use. 066 * </p> 067 * <p> 068 * Instances of {@link WriterOutputStream} are not thread safe. 069 * </p> 070 * 071 * @see org.apache.commons.io.input.ReaderInputStream 072 * @since 2.0 073 */ 074public class WriterOutputStream extends OutputStream { 075 private static final int BUFFER_SIZE = 1024; 076 077 private final Writer writer; 078 private final CharsetDecoder decoder; 079 private final boolean writeImmediately; 080 081 /** 082 * ByteBuffer used as input for the decoder. This buffer can be small 083 * as it is used only to transfer the received data to the 084 * decoder. 085 */ 086 private final ByteBuffer decoderIn = ByteBuffer.allocate(128); 087 088 /** 089 * CharBuffer used as output for the decoder. It should be 090 * somewhat larger as we write from this buffer to the 091 * underlying Writer. 092 */ 093 private final CharBuffer decoderOut; 094 095 /** 096 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} 097 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} 098 * is called. 099 * 100 * @param writer the target {@link Writer} 101 * @param decoder the charset decoder 102 * @since 2.1 103 */ 104 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) { 105 this(writer, decoder, BUFFER_SIZE, false); 106 } 107 108 /** 109 * Constructs a new {@link WriterOutputStream}. 110 * 111 * @param writer the target {@link Writer} 112 * @param decoder the charset decoder 113 * @param bufferSize the size of the output buffer in number of characters 114 * @param writeImmediately If {@code true} the output buffer will be flushed after each 115 * write operation, i.e. all available data will be written to the 116 * underlying {@link Writer} immediately. If {@code false}, the 117 * output buffer will only be flushed when it overflows or when 118 * {@link #flush()} or {@link #close()} is called. 119 * @since 2.1 120 */ 121 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, 122 final boolean writeImmediately) { 123 checkIbmJdkWithBrokenUTF16( decoder.charset()); 124 this.writer = writer; 125 this.decoder = decoder; 126 this.writeImmediately = writeImmediately; 127 decoderOut = CharBuffer.allocate(bufferSize); 128 } 129 130 /** 131 * Constructs a new {@link WriterOutputStream}. 132 * 133 * @param writer the target {@link Writer} 134 * @param charset the charset encoding 135 * @param bufferSize the size of the output buffer in number of characters 136 * @param writeImmediately If {@code true} the output buffer will be flushed after each 137 * write operation, i.e. all available data will be written to the 138 * underlying {@link Writer} immediately. If {@code false}, the 139 * output buffer will only be flushed when it overflows or when 140 * {@link #flush()} or {@link #close()} is called. 141 */ 142 public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, 143 final boolean writeImmediately) { 144 this(writer, 145 charset.newDecoder() 146 .onMalformedInput(CodingErrorAction.REPLACE) 147 .onUnmappableCharacter(CodingErrorAction.REPLACE) 148 .replaceWith("?"), 149 bufferSize, 150 writeImmediately); 151 } 152 153 /** 154 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} 155 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} 156 * is called. 157 * 158 * @param writer the target {@link Writer} 159 * @param charset the charset encoding 160 */ 161 public WriterOutputStream(final Writer writer, final Charset charset) { 162 this(writer, charset, BUFFER_SIZE, false); 163 } 164 165 /** 166 * Constructs a new {@link WriterOutputStream}. 167 * 168 * @param writer the target {@link Writer} 169 * @param charsetName the name of the charset encoding 170 * @param bufferSize the size of the output buffer in number of characters 171 * @param writeImmediately If {@code true} the output buffer will be flushed after each 172 * write operation, i.e. all available data will be written to the 173 * underlying {@link Writer} immediately. If {@code false}, the 174 * output buffer will only be flushed when it overflows or when 175 * {@link #flush()} or {@link #close()} is called. 176 */ 177 public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, 178 final boolean writeImmediately) { 179 this(writer, Charset.forName(charsetName), bufferSize, writeImmediately); 180 } 181 182 /** 183 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} 184 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} 185 * is called. 186 * 187 * @param writer the target {@link Writer} 188 * @param charsetName the name of the charset encoding 189 */ 190 public WriterOutputStream(final Writer writer, final String charsetName) { 191 this(writer, charsetName, BUFFER_SIZE, false); 192 } 193 194 /** 195 * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output 196 * buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when 197 * {@link #flush()} or {@link #close()} is called. 198 * 199 * @param writer the target {@link Writer} 200 * @deprecated 2.5 use {@link #WriterOutputStream(Writer, Charset)} instead 201 */ 202 @Deprecated 203 public WriterOutputStream(final Writer writer) { 204 this(writer, Charset.defaultCharset(), BUFFER_SIZE, false); 205 } 206 207 /** 208 * Write bytes from the specified byte array to the stream. 209 * 210 * @param b the byte array containing the bytes to write 211 * @param off the start offset in the byte array 212 * @param len the number of bytes to write 213 * @throws IOException if an I/O error occurs. 214 */ 215 @Override 216 public void write(final byte[] b, int off, int len) throws IOException { 217 while (len > 0) { 218 final int c = Math.min(len, decoderIn.remaining()); 219 decoderIn.put(b, off, c); 220 processInput(false); 221 len -= c; 222 off += c; 223 } 224 if (writeImmediately) { 225 flushOutput(); 226 } 227 } 228 229 /** 230 * Write bytes from the specified byte array to the stream. 231 * 232 * @param b the byte array containing the bytes to write 233 * @throws IOException if an I/O error occurs. 234 */ 235 @Override 236 public void write(final byte[] b) throws IOException { 237 write(b, 0, b.length); 238 } 239 240 /** 241 * Write a single byte to the stream. 242 * 243 * @param b the byte to write 244 * @throws IOException if an I/O error occurs. 245 */ 246 @Override 247 public void write(final int b) throws IOException { 248 write(new byte[] {(byte) b}, 0, 1); 249 } 250 251 /** 252 * Flush the stream. Any remaining content accumulated in the output buffer 253 * will be written to the underlying {@link Writer}. After that 254 * {@link Writer#flush()} will be called. 255 * @throws IOException if an I/O error occurs. 256 */ 257 @Override 258 public void flush() throws IOException { 259 flushOutput(); 260 writer.flush(); 261 } 262 263 /** 264 * Close the stream. Any remaining content accumulated in the output buffer 265 * will be written to the underlying {@link Writer}. After that 266 * {@link Writer#close()} will be called. 267 * @throws IOException if an I/O error occurs. 268 */ 269 @Override 270 public void close() throws IOException { 271 processInput(true); 272 flushOutput(); 273 writer.close(); 274 } 275 276 /** 277 * Decode the contents of the input ByteBuffer into a CharBuffer. 278 * 279 * @param endOfInput indicates end of input 280 * @throws IOException if an I/O error occurs. 281 */ 282 private void processInput(final boolean endOfInput) throws IOException { 283 // Prepare decoderIn for reading 284 decoderIn.flip(); 285 CoderResult coderResult; 286 while (true) { 287 coderResult = decoder.decode(decoderIn, decoderOut, endOfInput); 288 if (coderResult.isOverflow()) { 289 flushOutput(); 290 } else if (coderResult.isUnderflow()) { 291 break; 292 } else { 293 // The decoder is configured to replace malformed input and unmappable characters, 294 // so we should not get here. 295 throw new IOException("Unexpected coder result"); 296 } 297 } 298 // Discard the bytes that have been read 299 decoderIn.compact(); 300 } 301 302 /** 303 * Flush the output. 304 * 305 * @throws IOException if an I/O error occurs. 306 */ 307 private void flushOutput() throws IOException { 308 if (decoderOut.position() > 0) { 309 writer.write(decoderOut.array(), 0, decoderOut.position()); 310 decoderOut.rewind(); 311 } 312 } 313 314 /** 315 * Check if the JDK in use properly supports the given charset. 316 * 317 * @param charset the charset to check the support for 318 */ 319 private static void checkIbmJdkWithBrokenUTF16(final Charset charset){ 320 if (!"UTF-16".equals(charset.name())) { 321 return; 322 } 323 final String TEST_STRING_2 = "v\u00e9s"; 324 final byte[] bytes = TEST_STRING_2.getBytes(charset); 325 326 final CharsetDecoder charsetDecoder2 = charset.newDecoder(); 327 final ByteBuffer bb2 = ByteBuffer.allocate(16); 328 final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length()); 329 final int len = bytes.length; 330 for (int i = 0; i < len; i++) { 331 bb2.put(bytes[i]); 332 bb2.flip(); 333 try { 334 charsetDecoder2.decode(bb2, cb2, i == (len - 1)); 335 } catch ( final IllegalArgumentException e){ 336 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " + 337 "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 338 } 339 bb2.compact(); 340 } 341 cb2.rewind(); 342 if (!TEST_STRING_2.equals(cb2.toString())){ 343 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " + 344 "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 345 } 346 347 } 348}