001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025import java.security.AccessController; 026import java.security.PrivilegedAction; 027import java.util.Collections; 028import java.util.Locale; 029import java.util.ServiceLoader; 030import java.util.Set; 031import java.util.SortedMap; 032import java.util.TreeMap; 033 034import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 035import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 036import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 037import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 038import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 039import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 040import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 041import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 042import org.apache.commons.compress.archivers.sevenz.SevenZFile; 043import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 044import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 045import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 046import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 047import org.apache.commons.compress.utils.IOUtils; 048import org.apache.commons.compress.utils.Sets; 049 050/** 051 * Factory to create Archive[In|Out]putStreams from names or the first bytes of 052 * the InputStream. In order to add other implementations, you should extend 053 * ArchiveStreamFactory and override the appropriate methods (and call their 054 * implementation from super of course). 055 * 056 * Compressing a ZIP-File: 057 * 058 * <pre> 059 * final OutputStream out = Files.newOutputStream(output.toPath()); 060 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 061 * 062 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 063 * IOUtils.copy(Files.newInputStream(file1.toPath()), os); 064 * os.closeArchiveEntry(); 065 * 066 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 067 * IOUtils.copy(Files.newInputStream(file2.toPath()), os); 068 * os.closeArchiveEntry(); 069 * os.close(); 070 * </pre> 071 * 072 * Decompressing a ZIP-File: 073 * 074 * <pre> 075 * final InputStream is = Files.newInputStream(input.toPath()); 076 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 077 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); 078 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName())); 079 * IOUtils.copy(in, out); 080 * out.close(); 081 * in.close(); 082 * </pre> 083 * @Immutable provided that the deprecated method setEntryEncoding is not used. 084 * @ThreadSafe even if the deprecated method setEntryEncoding is used 085 */ 086public class ArchiveStreamFactory implements ArchiveStreamProvider { 087 088 private static final int TAR_HEADER_SIZE = 512; 089 090 private static final int DUMP_SIGNATURE_SIZE = 32; 091 092 private static final int SIGNATURE_SIZE = 12; 093 094 /** 095 * The singleton instance using the platform default encoding. 096 * @since 1.21 097 */ 098 public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory(); 099 100 /** 101 * Constant (value {@value}) used to identify the APK archive format. 102 * <p> 103 * APK file extensions are .apk, .xapk, .apks, .apkm 104 * </p> 105 * 106 * @since 1.22 107 */ 108 public static final String APK = "apk"; 109 110 /** 111 * Constant (value {@value}) used to identify the XAPK archive format. 112 * <p> 113 * APK file extensions are .apk, .xapk, .apks, .apkm 114 * </p> 115 * 116 * @since 1.22 117 */ 118 public static final String XAPK = "xapk"; 119 120 /** 121 * Constant (value {@value}) used to identify the APKS archive format. 122 * <p> 123 * APK file extensions are .apk, .xapk, .apks, .apkm 124 * </p> 125 * 126 * @since 1.22 127 */ 128 public static final String APKS = "apks"; 129 130 /** 131 * Constant (value {@value}) used to identify the APKM archive format. 132 * <p> 133 * APK file extensions are .apk, .xapk, .apks, .apkm 134 * </p> 135 * 136 * @since 1.22 137 */ 138 public static final String APKM = "apkm"; 139 140 /** 141 * Constant (value {@value}) used to identify the AR archive format. 142 * @since 1.1 143 */ 144 public static final String AR = "ar"; 145 146 /** 147 * Constant (value {@value}) used to identify the ARJ archive format. 148 * Not supported as an output stream type. 149 * @since 1.6 150 */ 151 public static final String ARJ = "arj"; 152 153 /** 154 * Constant (value {@value}) used to identify the CPIO archive format. 155 * @since 1.1 156 */ 157 public static final String CPIO = "cpio"; 158 159 /** 160 * Constant (value {@value}) used to identify the Unix DUMP archive format. 161 * Not supported as an output stream type. 162 * @since 1.3 163 */ 164 public static final String DUMP = "dump"; 165 166 /** 167 * Constant (value {@value}) used to identify the JAR archive format. 168 * @since 1.1 169 */ 170 public static final String JAR = "jar"; 171 172 /** 173 * Constant used to identify the TAR archive format. 174 * @since 1.1 175 */ 176 public static final String TAR = "tar"; 177 178 /** 179 * Constant (value {@value}) used to identify the ZIP archive format. 180 * @since 1.1 181 */ 182 public static final String ZIP = "zip"; 183 184 /** 185 * Constant (value {@value}) used to identify the 7z archive format. 186 * @since 1.8 187 */ 188 public static final String SEVEN_Z = "7z"; 189 190 private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() { 191 return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader()); 192 } 193 194 /** 195 * Try to determine the type of Archiver 196 * @param in input stream 197 * @return type of archiver if found 198 * @throws ArchiveException if an archiver cannot be detected in the stream 199 * @since 1.14 200 */ 201 public static String detect(final InputStream in) throws ArchiveException { 202 if (in == null) { 203 throw new IllegalArgumentException("Stream must not be null."); 204 } 205 206 if (!in.markSupported()) { 207 throw new IllegalArgumentException("Mark is not supported."); 208 } 209 210 final byte[] signature = new byte[SIGNATURE_SIZE]; 211 in.mark(signature.length); 212 int signatureLength = -1; 213 try { 214 signatureLength = IOUtils.readFully(in, signature); 215 in.reset(); 216 } catch (final IOException e) { 217 throw new ArchiveException("IOException while reading signature.", e); 218 } 219 220 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 221 return ZIP; 222 } 223 if (JarArchiveInputStream.matches(signature, signatureLength)) { 224 return JAR; 225 } 226 if (ArArchiveInputStream.matches(signature, signatureLength)) { 227 return AR; 228 } 229 if (CpioArchiveInputStream.matches(signature, signatureLength)) { 230 return CPIO; 231 } 232 if (ArjArchiveInputStream.matches(signature, signatureLength)) { 233 return ARJ; 234 } 235 if (SevenZFile.matches(signature, signatureLength)) { 236 return SEVEN_Z; 237 } 238 239 // Dump needs a bigger buffer to check the signature; 240 final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE]; 241 in.mark(dumpsig.length); 242 try { 243 signatureLength = IOUtils.readFully(in, dumpsig); 244 in.reset(); 245 } catch (final IOException e) { 246 throw new ArchiveException("IOException while reading dump signature", e); 247 } 248 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 249 return DUMP; 250 } 251 252 // Tar needs an even bigger buffer to check the signature; read the first block 253 final byte[] tarHeader = new byte[TAR_HEADER_SIZE]; 254 in.mark(tarHeader.length); 255 try { 256 signatureLength = IOUtils.readFully(in, tarHeader); 257 in.reset(); 258 } catch (final IOException e) { 259 throw new ArchiveException("IOException while reading tar signature", e); 260 } 261 if (TarArchiveInputStream.matches(tarHeader, signatureLength)) { 262 return TAR; 263 } 264 265 // COMPRESS-117 - improve auto-recognition 266 if (signatureLength >= TAR_HEADER_SIZE) { 267 try (TarArchiveInputStream tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader))) { 268 // COMPRESS-191 - verify the header checksum 269 if (tais.getNextTarEntry().isCheckSumOK()) { 270 return TAR; 271 } 272 } catch (final Exception e) { // NOPMD NOSONAR 273 // can generate IllegalArgumentException as well 274 // as IOException 275 // autodetection, simply not a TAR 276 // ignored 277 } 278 } 279 throw new ArchiveException("No Archiver found for the stream signature"); 280 } 281 282 /** 283 * Constructs a new sorted map from input stream provider names to provider 284 * objects. 285 * 286 * <p> 287 * The map returned by this method will have one entry for each provider for 288 * which support is available in the current Java virtual machine. If two or 289 * more supported provider have the same name then the resulting map will 290 * contain just one of them; which one it will contain is not specified. 291 * </p> 292 * 293 * <p> 294 * The invocation of this method, and the subsequent use of the resulting 295 * map, may cause time-consuming disk or network I/O operations to occur. 296 * This method is provided for applications that need to enumerate all of 297 * the available providers, for example to allow user provider selection. 298 * </p> 299 * 300 * <p> 301 * This method may return different results at different times if new 302 * providers are dynamically made available to the current Java virtual 303 * machine. 304 * </p> 305 * 306 * @return An immutable, map from names to provider objects 307 * @since 1.13 308 */ 309 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() { 310 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 311 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 312 putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map); 313 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map)); 314 return map; 315 }); 316 } 317 318 /** 319 * Constructs a new sorted map from output stream provider names to provider 320 * objects. 321 * 322 * <p> 323 * The map returned by this method will have one entry for each provider for 324 * which support is available in the current Java virtual machine. If two or 325 * more supported provider have the same name then the resulting map will 326 * contain just one of them; which one it will contain is not specified. 327 * </p> 328 * 329 * <p> 330 * The invocation of this method, and the subsequent use of the resulting 331 * map, may cause time-consuming disk or network I/O operations to occur. 332 * This method is provided for applications that need to enumerate all of 333 * the available providers, for example to allow user provider selection. 334 * </p> 335 * 336 * <p> 337 * This method may return different results at different times if new 338 * providers are dynamically made available to the current Java virtual 339 * machine. 340 * </p> 341 * 342 * @return An immutable, map from names to provider objects 343 * @since 1.13 344 */ 345 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() { 346 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 347 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 348 putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map); 349 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map)); 350 return map; 351 }); 352 } 353 354 static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) { 355 names.forEach(name -> map.put(toKey(name), provider)); 356 } 357 358 private static String toKey(final String name) { 359 return name.toUpperCase(Locale.ROOT); 360 } 361 362 /** 363 * Entry encoding, null for the platform default. 364 */ 365 private final String encoding; 366 367 /** 368 * Entry encoding, null for the default. 369 */ 370 private volatile String entryEncoding; 371 372 private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders; 373 374 private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders; 375 376 /** 377 * Create an instance using the platform default encoding. 378 */ 379 public ArchiveStreamFactory() { 380 this(null); 381 } 382 383 /** 384 * Create an instance using the specified encoding. 385 * 386 * @param encoding the encoding to be used. 387 * 388 * @since 1.10 389 */ 390 public ArchiveStreamFactory(final String encoding) { 391 this.encoding = encoding; 392 // Also set the original field so can continue to use it. 393 this.entryEncoding = encoding; 394 } 395 396 /** 397 * Create an archive input stream from an input stream, autodetecting 398 * the archive type from the first few bytes of the stream. The InputStream 399 * must support marks, like BufferedInputStream. 400 * 401 * @param in the input stream 402 * @return the archive input stream 403 * @throws ArchiveException if the archiver name is not known 404 * @throws StreamingNotSupportedException if the format cannot be 405 * read from a stream 406 * @throws IllegalArgumentException if the stream is null or does not support mark 407 */ 408 public ArchiveInputStream createArchiveInputStream(final InputStream in) 409 throws ArchiveException { 410 return createArchiveInputStream(detect(in), in); 411 } 412 413 /** 414 * Creates an archive input stream from an archiver name and an input stream. 415 * 416 * @param archiverName the archive name, 417 * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z} 418 * @param in the input stream 419 * @return the archive input stream 420 * @throws ArchiveException if the archiver name is not known 421 * @throws StreamingNotSupportedException if the format cannot be 422 * read from a stream 423 * @throws IllegalArgumentException if the archiver name or stream is null 424 */ 425 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in) throws ArchiveException { 426 return createArchiveInputStream(archiverName, in, entryEncoding); 427 } 428 429 @Override 430 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in, 431 final String actualEncoding) throws ArchiveException { 432 433 if (archiverName == null) { 434 throw new IllegalArgumentException("Archivername must not be null."); 435 } 436 437 if (in == null) { 438 throw new IllegalArgumentException("InputStream must not be null."); 439 } 440 441 if (AR.equalsIgnoreCase(archiverName)) { 442 return new ArArchiveInputStream(in); 443 } 444 if (ARJ.equalsIgnoreCase(archiverName)) { 445 if (actualEncoding != null) { 446 return new ArjArchiveInputStream(in, actualEncoding); 447 } 448 return new ArjArchiveInputStream(in); 449 } 450 if (ZIP.equalsIgnoreCase(archiverName)) { 451 if (actualEncoding != null) { 452 return new ZipArchiveInputStream(in, actualEncoding); 453 } 454 return new ZipArchiveInputStream(in); 455 } 456 if (TAR.equalsIgnoreCase(archiverName)) { 457 if (actualEncoding != null) { 458 return new TarArchiveInputStream(in, actualEncoding); 459 } 460 return new TarArchiveInputStream(in); 461 } 462 if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) { 463 if (actualEncoding != null) { 464 return new JarArchiveInputStream(in, actualEncoding); 465 } 466 return new JarArchiveInputStream(in); 467 } 468 if (CPIO.equalsIgnoreCase(archiverName)) { 469 if (actualEncoding != null) { 470 return new CpioArchiveInputStream(in, actualEncoding); 471 } 472 return new CpioArchiveInputStream(in); 473 } 474 if (DUMP.equalsIgnoreCase(archiverName)) { 475 if (actualEncoding != null) { 476 return new DumpArchiveInputStream(in, actualEncoding); 477 } 478 return new DumpArchiveInputStream(in); 479 } 480 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 481 throw new StreamingNotSupportedException(SEVEN_Z); 482 } 483 484 final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName)); 485 if (archiveStreamProvider != null) { 486 return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding); 487 } 488 489 throw new ArchiveException("Archiver: " + archiverName + " not found."); 490 } 491 492 /** 493 * Creates an archive output stream from an archiver name and an output stream. 494 * 495 * @param archiverName the archive name, 496 * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 497 * @param out the output stream 498 * @return the archive output stream 499 * @throws ArchiveException if the archiver name is not known 500 * @throws StreamingNotSupportedException if the format cannot be 501 * written to a stream 502 * @throws IllegalArgumentException if the archiver name or stream is null 503 */ 504 public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out) 505 throws ArchiveException { 506 return createArchiveOutputStream(archiverName, out, entryEncoding); 507 } 508 509 @Override 510 public ArchiveOutputStream createArchiveOutputStream( 511 final String archiverName, final OutputStream out, final String actualEncoding) 512 throws ArchiveException { 513 if (archiverName == null) { 514 throw new IllegalArgumentException("Archivername must not be null."); 515 } 516 if (out == null) { 517 throw new IllegalArgumentException("OutputStream must not be null."); 518 } 519 520 if (AR.equalsIgnoreCase(archiverName)) { 521 return new ArArchiveOutputStream(out); 522 } 523 if (ZIP.equalsIgnoreCase(archiverName)) { 524 final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 525 if (actualEncoding != null) { 526 zip.setEncoding(actualEncoding); 527 } 528 return zip; 529 } 530 if (TAR.equalsIgnoreCase(archiverName)) { 531 if (actualEncoding != null) { 532 return new TarArchiveOutputStream(out, actualEncoding); 533 } 534 return new TarArchiveOutputStream(out); 535 } 536 if (JAR.equalsIgnoreCase(archiverName)) { 537 if (actualEncoding != null) { 538 return new JarArchiveOutputStream(out, actualEncoding); 539 } 540 return new JarArchiveOutputStream(out); 541 } 542 if (CPIO.equalsIgnoreCase(archiverName)) { 543 if (actualEncoding != null) { 544 return new CpioArchiveOutputStream(out, actualEncoding); 545 } 546 return new CpioArchiveOutputStream(out); 547 } 548 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 549 throw new StreamingNotSupportedException(SEVEN_Z); 550 } 551 552 final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName)); 553 if (archiveStreamProvider != null) { 554 return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding); 555 } 556 557 throw new ArchiveException("Archiver: " + archiverName + " not found."); 558 } 559 560 public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() { 561 if (archiveInputStreamProviders == null) { 562 archiveInputStreamProviders = Collections 563 .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders()); 564 } 565 return archiveInputStreamProviders; 566 } 567 568 public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() { 569 if (archiveOutputStreamProviders == null) { 570 archiveOutputStreamProviders = Collections 571 .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders()); 572 } 573 return archiveOutputStreamProviders; 574 } 575 576 /** 577 * Gets the encoding to use for arj, jar, ZIP, dump, cpio and tar 578 * files, or null for the archiver default. 579 * 580 * @return entry encoding, or null for the archiver default 581 * @since 1.5 582 */ 583 public String getEntryEncoding() { 584 return entryEncoding; 585 } 586 587 @Override 588 public Set<String> getInputStreamArchiveNames() { 589 return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z); 590 } 591 592 @Override 593 public Set<String> getOutputStreamArchiveNames() { 594 return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z); 595 } 596 597 /** 598 * Sets the encoding to use for arj, jar, ZIP, dump, cpio and tar files. Use null for the archiver default. 599 * 600 * @param entryEncoding the entry encoding, null uses the archiver default. 601 * @since 1.5 602 * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding 603 * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} 604 * was used to specify the factory encoding. 605 */ 606 @Deprecated 607 public void setEntryEncoding(final String entryEncoding) { 608 // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway 609 if (encoding != null) { 610 throw new IllegalStateException("Cannot overide encoding set by the constructor"); 611 } 612 this.entryEncoding = entryEncoding; 613 } 614 615}