001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.pack200; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026import java.util.stream.IntStream; 027 028/** 029 * Abstract superclass for a set of bands 030 */ 031public abstract class BandSet { 032 033 /** 034 * Results obtained by trying different Codecs to encode a band 035 */ 036 public class BandAnalysisResults { 037 038 // The number of Codecs tried so far 039 private int numCodecsTried = 0; 040 041 // The number of bytes saved by using betterCodec instead of the default codec 042 private int saved = 0; 043 044 // Extra metadata to pass to the segment header (to be appended to the 045 // band_headers band) 046 private int[] extraMetadata; 047 048 // The results of encoding the band with betterCodec 049 private byte[] encodedBand; 050 051 // The best Codec found so far, or should be null if the default is the 052 // best so far 053 private Codec betterCodec; 054 055 } 056 /** 057 * BandData represents information about a band, e.g. largest value etc and is used in the heuristics that calculate 058 * whether an alternative Codec could make the encoded band smaller. 059 */ 060 public class BandData { 061 062 private final int[] band; 063 private int smallest = Integer.MAX_VALUE; 064 private int largest = Integer.MIN_VALUE; 065 private int smallestDelta; 066 private int largestDelta; 067 068 private int deltaIsAscending = 0; 069 private int smallDeltaCount = 0; 070 071 private double averageAbsoluteDelta = 0; 072 private double averageAbsoluteValue = 0; 073 074 private Map<Integer, Integer> distinctValues; 075 076 /** 077 * Create a new instance of BandData. The band is then analysed. 078 * 079 * @param band - the band of integers 080 */ 081 public BandData(final int[] band) { 082 this.band = band; 083 final Integer one = Integer.valueOf(1); 084 for (int i = 0; i < band.length; i++) { 085 if (band[i] < smallest) { 086 smallest = band[i]; 087 } 088 if (band[i] > largest) { 089 largest = band[i]; 090 } 091 if (i != 0) { 092 final int delta = band[i] - band[i - 1]; 093 if (delta < smallestDelta) { 094 smallestDelta = delta; 095 } 096 if (delta > largestDelta) { 097 largestDelta = delta; 098 } 099 if (delta >= 0) { 100 deltaIsAscending++; 101 } 102 averageAbsoluteDelta += (double) Math.abs(delta) / (double) (band.length - 1); 103 if (Math.abs(delta) < 256) { 104 smallDeltaCount++; 105 } 106 } else { 107 smallestDelta = band[0]; 108 largestDelta = band[0]; 109 } 110 averageAbsoluteValue += (double) Math.abs(band[i]) / (double) band.length; 111 if (effort > 3) { // do calculations needed to consider population codec 112 if (distinctValues == null) { 113 distinctValues = new HashMap<>(); 114 } 115 final Integer value = Integer.valueOf(band[i]); 116 Integer count = distinctValues.get(value); 117 if (count == null) { 118 count = one; 119 } else { 120 count = Integer.valueOf(count.intValue() + 1); 121 } 122 distinctValues.put(value, count); 123 } 124 } 125 } 126 127 /** 128 * Returns true if any band elements are negative. 129 * 130 * @return true if any band elements are negative. 131 */ 132 public boolean anyNegatives() { 133 return smallest < 0; 134 } 135 136 /** 137 * Returns true if the band deltas are mainly positive (heuristic). 138 * 139 * @return true if the band deltas are mainly positive (heuristic). 140 */ 141 public boolean mainlyPositiveDeltas() { 142 // Note: the value below has been tuned - please test carefully if changing it 143 return (float) deltaIsAscending / (float) band.length > 0.95F; 144 } 145 146 /** 147 * Returns true if the deltas between adjacent band elements are mainly small (heuristic). 148 * 149 * @return true if the deltas between adjacent band elements are mainly small (heuristic). 150 */ 151 public boolean mainlySmallDeltas() { 152 // Note: the value below has been tuned - please test carefully if changing it 153 return (float) smallDeltaCount / (float) band.length > 0.7F; 154 } 155 156 /** 157 * Returns the total number of distinct values found in the band. 158 * 159 * @return the total number of distinct values found in the band. 160 */ 161 public int numDistinctValues() { 162 if (distinctValues == null) { 163 return band.length; 164 } 165 return distinctValues.size(); 166 } 167 168 /** 169 * Returns true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 170 * 171 * @return true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 172 */ 173 public boolean wellCorrelated() { 174 // Note: the value below has been tuned - please test carefully if changing it 175 return averageAbsoluteDelta * 3.1 < averageAbsoluteValue; 176 } 177 178 } 179 180 // Minimum size of band for each effort level where we consider alternative codecs 181 // Note: these values have been tuned - please test carefully if changing them 182 private static final int[] effortThresholds = {0, 0, 1000, 500, 100, 100, 100, 100, 100, 0}; 183 184 protected final SegmentHeader segmentHeader; 185 final int effort; 186 187 private long[] canonicalLargest; 188 189 private long[] canonicalSmallest; 190 191 /** 192 * Create a new BandSet 193 * 194 * @param effort - the packing effort to be used (must be 1-9) 195 * @param header - the segment header 196 */ 197 public BandSet(final int effort, final SegmentHeader header) { 198 this.effort = effort; 199 this.segmentHeader = header; 200 } 201 202 private BandAnalysisResults analyseBand(final String name, final int[] band, final BHSDCodec defaultCodec) 203 throws Pack200Exception { 204 205 final BandAnalysisResults results = new BandAnalysisResults(); 206 207 if (canonicalLargest == null) { 208 canonicalLargest = new long[116]; 209 canonicalSmallest = new long[116]; 210 for (int i = 1; i < canonicalLargest.length; i++) { 211 canonicalLargest[i] = CodecEncoding.getCanonicalCodec(i).largest(); 212 canonicalSmallest[i] = CodecEncoding.getCanonicalCodec(i).smallest(); 213 } 214 } 215 final BandData bandData = new BandData(band); 216 217 // Check that there is a reasonable saving to be made 218 final byte[] encoded = defaultCodec.encode(band); 219 results.encodedBand = encoded; 220 221 // Note: these values have been tuned - please test carefully if changing them 222 if (encoded.length <= band.length + 23 - 2 * effort) { // TODO: tweak 223 return results; 224 } 225 226 // Check if we can use BYTE1 as that's a 1:1 mapping if we can 227 if (!bandData.anyNegatives() && bandData.largest <= Codec.BYTE1.largest()) { 228 results.encodedBand = Codec.BYTE1.encode(band); 229 results.betterCodec = Codec.BYTE1; 230 return results; 231 } 232 233 // Consider a population codec (but can't be nested) 234 if (effort > 3 && !name.equals("POPULATION")) { 235 final int numDistinctValues = bandData.numDistinctValues(); 236 final float distinctValuesAsProportion = (float) numDistinctValues / (float) band.length; 237 238 // Note: these values have been tuned - please test carefully if changing them 239 if (numDistinctValues < 100 || distinctValuesAsProportion < 0.02 240 || (effort > 6 && distinctValuesAsProportion < 0.04)) { // TODO: tweak 241 encodeWithPopulationCodec(name, band, defaultCodec, bandData, results); 242 if (timeToStop(results)) { 243 return results; 244 } 245 } 246 } 247 248 final List<BHSDCodec[]> codecFamiliesToTry = new ArrayList<>(); 249 250 // See if the deltas are mainly small increments 251 if (bandData.mainlyPositiveDeltas() && bandData.mainlySmallDeltas()) { 252 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs2); 253 } 254 255 if (bandData.wellCorrelated()) { // Try delta encodings 256 if (bandData.mainlyPositiveDeltas()) { 257 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 258 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 259 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 260 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 261 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 262 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 263 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 264 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 265 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 266 } else { 267 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 268 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 269 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 270 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 271 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 272 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 273 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 274 } 275 } else if (bandData.anyNegatives()) { 276 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 277 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 278 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 279 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 280 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 281 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 282 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 283 } else { 284 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 285 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 286 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 287 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 288 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 289 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 290 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 291 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 292 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 293 } 294 if (name.equalsIgnoreCase("cpint")) { 295 System.out.print(""); 296 } 297 298 for (final BHSDCodec[] family : codecFamiliesToTry) { 299 tryCodecs(name, band, defaultCodec, bandData, results, encoded, family); 300 if (timeToStop(results)) { 301 break; 302 } 303 } 304 305 return results; 306 } 307 308 /** 309 * Converts a list of ConstantPoolEntrys to an int[] array of their indices 310 * 311 * @param list conversion source. 312 * @return conversion result. 313 */ 314 protected int[] cpEntryListToArray(final List<? extends ConstantPoolEntry> list) { 315 final int[] array = new int[list.size()]; 316 for (int i = 0; i < array.length; i++) { 317 array[i] = list.get(i).getIndex(); 318 if (array[i] < 0) { 319 throw new IllegalArgumentException("Index should be > 0"); 320 } 321 } 322 return array; 323 } 324 325 /** 326 * Converts a list of ConstantPoolEntrys or nulls to an int[] array of their indices +1 (or 0 for nulls) 327 * 328 * @param list conversion source. 329 * @return conversion result. 330 */ 331 protected int[] cpEntryOrNullListToArray(final List<? extends ConstantPoolEntry> list) { 332 final int[] array = new int[list.size()]; 333 for (int j = 0; j < array.length; j++) { 334 final ConstantPoolEntry cpEntry = list.get(j); 335 array[j] = cpEntry == null ? 0 : cpEntry.getIndex() + 1; 336 if (cpEntry != null && cpEntry.getIndex() < 0) { 337 throw new IllegalArgumentException("Index should be > 0"); 338 } 339 } 340 return array; 341 } 342 343 /** 344 * Encode a band of integers. The default codec may be used, but other Codecs are considered if effort is greater 345 * than 1. 346 * 347 * @param name - name of the band (used for debugging) 348 * @param ints - the band 349 * @param defaultCodec - the default Codec 350 * @return the encoded band 351 * @throws Pack200Exception TODO 352 */ 353 public byte[] encodeBandInt(final String name, final int[] ints, final BHSDCodec defaultCodec) 354 throws Pack200Exception { 355 byte[] encodedBand = null; 356 // Useful for debugging 357// if (ints.length > 0) { 358// System.out.println("encoding " + name + " " + ints.length); 359// } 360 if (effort > 1 && (ints.length >= effortThresholds[effort])) { 361 final BandAnalysisResults results = analyseBand(name, ints, defaultCodec); 362 final Codec betterCodec = results.betterCodec; 363 encodedBand = results.encodedBand; 364 if (betterCodec != null) { 365 if (betterCodec instanceof BHSDCodec) { 366 final int[] specifierBand = CodecEncoding.getSpecifier(betterCodec, defaultCodec); 367 int specifier = specifierBand[0]; 368 if (specifierBand.length > 1) { 369 for (int i = 1; i < specifierBand.length; i++) { 370 segmentHeader.appendBandCodingSpecifier(specifierBand[i]); 371 } 372 } 373 if (defaultCodec.isSigned()) { 374 specifier = -1 - specifier; 375 } else { 376 specifier = specifier + defaultCodec.getL(); 377 } 378 final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 379 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 380 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 381 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 382 return band; 383 } 384 if (betterCodec instanceof PopulationCodec) { 385 IntStream.of(results.extraMetadata).forEach(segmentHeader::appendBandCodingSpecifier); 386 return encodedBand; 387 } 388 if (betterCodec instanceof RunCodec) { 389 390 } 391 } 392 } 393 394 // If we get here then we've decided to use the default codec. 395 if (ints.length > 0) { 396 if (encodedBand == null) { 397 encodedBand = defaultCodec.encode(ints); 398 } 399 final int first = ints[0]; 400 if (defaultCodec.getB() != 1) { 401 if (defaultCodec.isSigned() && first >= -256 && first <= -1) { 402 final int specifier = -1 - CodecEncoding.getSpecifierForDefaultCodec(defaultCodec); 403 final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 404 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 405 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 406 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 407 return band; 408 } 409 if (!defaultCodec.isSigned() && first >= defaultCodec.getL() && first <= defaultCodec.getL() + 255) { 410 final int specifier = CodecEncoding.getSpecifierForDefaultCodec(defaultCodec) + defaultCodec.getL(); 411 final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 412 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 413 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 414 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 415 return band; 416 } 417 } 418 return encodedBand; 419 } 420 return new byte[0]; 421 } 422 423 /** 424 * Encode a band of longs (values are split into their high and low 32 bits and then encoded as two separate bands 425 * 426 * @param name - name of the band (for debugging purposes) 427 * @param flags - the band 428 * @param loCodec - Codec for the low 32-bits band 429 * @param hiCodec - Codec for the high 32-bits band 430 * @param haveHiFlags - ignores the high band if true as all values would be zero 431 * @return the encoded band 432 * @throws Pack200Exception TODO 433 */ 434 protected byte[] encodeFlags(final String name, final long[] flags, final BHSDCodec loCodec, 435 final BHSDCodec hiCodec, final boolean haveHiFlags) throws Pack200Exception { 436 if (!haveHiFlags) { 437 final int[] loBits = new int[flags.length]; 438 Arrays.setAll(loBits, i -> (int) flags[i]); 439 return encodeBandInt(name, loBits, loCodec); 440 } 441 final int[] hiBits = new int[flags.length]; 442 final int[] loBits = new int[flags.length]; 443 for (int i = 0; i < flags.length; i++) { 444 final long l = flags[i]; 445 hiBits[i] = (int) (l >> 32); 446 loBits[i] = (int) l; 447 } 448 final byte[] hi = encodeBandInt(name, hiBits, hiCodec); 449 final byte[] lo = encodeBandInt(name, loBits, loCodec); 450 final byte[] total = new byte[hi.length + lo.length]; 451 System.arraycopy(hi, 0, total, 0, hi.length); 452 System.arraycopy(lo, 0, total, hi.length + 1, lo.length); 453 return total; 454 } 455 456// This could be useful if further enhancements are done but is not currently used 457// 458// private void encodeWithRunCodec(String name, int[] band, int index, 459// BHSDCodec defaultCodec, BandData bandData, 460// BandAnalysisResults results) throws Pack200Exception { 461// int[] firstBand = new int[index]; 462// int[] secondBand = new int[band.length - index]; 463// System.arraycopy(band, 0, firstBand, 0, index); 464// System.arraycopy(band, index, secondBand, 0, secondBand.length); 465// BandAnalysisResults firstResults = analyseBand(name + "A", firstBand, defaultCodec); 466// BandAnalysisResults secondResults = analyseBand(name + "B", secondBand, defaultCodec); 467// int specifier = 117; 468// byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 469// int totalLength = firstResults.encodedBand.length + secondResults.encodedBand.length + specifierEncoded.length + 4; // TODO actual 470// if (totalLength < results.encodedBand.length) { 471// System.out.println("using run codec"); 472// results.saved += results.encodedBand.length - totalLength; 473// byte[] encodedBand = new byte[specifierEncoded.length + firstResults.encodedBand.length + secondResults.encodedBand.length]; 474// System.arraycopy(specifierEncoded, 0, encodedBand, 0, specifierEncoded.length); 475// System.arraycopy(firstResults.encodedBand, 0, encodedBand, specifierEncoded.length, firstResults.encodedBand.length); 476// System.arraycopy(secondResults.encodedBand, 0, encodedBand, specifierEncoded.length + firstResults.encodedBand.length, secondResults.encodedBand.length); 477// results.encodedBand = encodedBand; 478// results.betterCodec = new RunCodec(index, firstResults.betterCodec, secondResults.betterCodec); 479// } 480// } 481 482 protected byte[] encodeFlags(final String name, final long[][] flags, final BHSDCodec loCodec, 483 final BHSDCodec hiCodec, final boolean haveHiFlags) throws Pack200Exception { 484 return encodeFlags(name, flatten(flags), loCodec, hiCodec, haveHiFlags); 485 } 486 487 /** 488 * Encode a single value with the given Codec 489 * 490 * @param value - the value to encode 491 * @param codec - Codec to use 492 * @return the encoded value 493 * @throws Pack200Exception TODO 494 */ 495 public byte[] encodeScalar(final int value, final BHSDCodec codec) throws Pack200Exception { 496 return codec.encode(value); 497 } 498 499 /** 500 * Encode a band without considering other Codecs 501 * 502 * @param band - the band 503 * @param codec - the Codec to use 504 * @return the encoded band 505 * @throws Pack200Exception TODO 506 */ 507 public byte[] encodeScalar(final int[] band, final BHSDCodec codec) throws Pack200Exception { 508 return codec.encode(band); 509 } 510 511 private void encodeWithPopulationCodec(final String name, final int[] band, final BHSDCodec defaultCodec, 512 final BandData bandData, final BandAnalysisResults results) throws Pack200Exception { 513 results.numCodecsTried += 3; // quite a bit more effort to try this codec 514 final Map<Integer, Integer> distinctValues = bandData.distinctValues; 515 516 final List<Integer> favored = new ArrayList<>(); 517 distinctValues.forEach((k, v) -> { 518 if (v.intValue() > 2 || distinctValues.size() < 256) { // TODO: tweak 519 favored.add(k); 520 } 521 }); 522 523 // Sort the favored list with the most commonly occurring first 524 if (distinctValues.size() > 255) { 525 favored.sort((arg0, arg1) -> distinctValues.get(arg1).compareTo(distinctValues.get(arg0))); 526 } 527 528 final Map<Integer, Integer> favoredToIndex = new HashMap<>(); 529 for (int i = 0; i < favored.size(); i++) { 530 favoredToIndex.put(favored.get(i), Integer.valueOf(i)); 531 } 532 533 final IntList unfavoured = new IntList(); 534 final int[] tokens = new int[band.length]; 535 for (int i = 0; i < band.length; i++) { 536 final Integer favouredIndex = favoredToIndex.get(Integer.valueOf(band[i])); 537 if (favouredIndex == null) { 538 tokens[i] = 0; 539 unfavoured.add(band[i]); 540 } else { 541 tokens[i] = favouredIndex.intValue() + 1; 542 } 543 } 544 favored.add(favored.get(favored.size() - 1)); // repeat last value 545 final int[] favouredBand = integerListToArray(favored); 546 final int[] unfavouredBand = unfavoured.toArray(); 547 548 // Analyse the three bands to get the best codec 549 final BandAnalysisResults favouredResults = analyseBand("POPULATION", favouredBand, defaultCodec); 550 final BandAnalysisResults unfavouredResults = analyseBand("POPULATION", unfavouredBand, defaultCodec); 551 552 int tdefL = 0; 553 int l = 0; 554 Codec tokenCodec = null; 555 byte[] tokensEncoded; 556 final int k = favored.size() - 1; 557 if (k < 256) { 558 tdefL = 1; 559 tokensEncoded = Codec.BYTE1.encode(tokens); 560 } else { 561 final BandAnalysisResults tokenResults = analyseBand("POPULATION", tokens, defaultCodec); 562 tokenCodec = tokenResults.betterCodec; 563 tokensEncoded = tokenResults.encodedBand; 564 if (tokenCodec == null) { 565 tokenCodec = defaultCodec; 566 } 567 l = ((BHSDCodec) tokenCodec).getL(); 568 final int h = ((BHSDCodec) tokenCodec).getH(); 569 final int s = ((BHSDCodec) tokenCodec).getS(); 570 final int b = ((BHSDCodec) tokenCodec).getB(); 571 final int d = ((BHSDCodec) tokenCodec).isDelta() ? 1 : 0; 572 if (s == 0 && d == 0) { 573 boolean canUseTDefL = true; 574 if (b > 1) { 575 final BHSDCodec oneLowerB = new BHSDCodec(b - 1, h); 576 if (oneLowerB.largest() >= k) { 577 canUseTDefL = false; 578 } 579 } 580 if (canUseTDefL) { 581 switch (l) { 582 case 4: 583 tdefL = 1; 584 break; 585 case 8: 586 tdefL = 2; 587 break; 588 case 16: 589 tdefL = 3; 590 break; 591 case 32: 592 tdefL = 4; 593 break; 594 case 64: 595 tdefL = 5; 596 break; 597 case 128: 598 tdefL = 6; 599 break; 600 case 192: 601 tdefL = 7; 602 break; 603 case 224: 604 tdefL = 8; 605 break; 606 case 240: 607 tdefL = 9; 608 break; 609 case 248: 610 tdefL = 10; 611 break; 612 case 252: 613 tdefL = 11; 614 break; 615 } 616 } 617 } 618 } 619 620 final byte[] favouredEncoded = favouredResults.encodedBand; 621 final byte[] unfavouredEncoded = unfavouredResults.encodedBand; 622 final Codec favouredCodec = favouredResults.betterCodec; 623 final Codec unfavouredCodec = unfavouredResults.betterCodec; 624 625 int specifier = 141 + (favouredCodec == null ? 1 : 0) + (4 * tdefL) + (unfavouredCodec == null ? 2 : 0); 626 final IntList extraBandMetadata = new IntList(3); 627 if (favouredCodec != null) { 628 IntStream.of(CodecEncoding.getSpecifier(favouredCodec, null)).forEach(extraBandMetadata::add); 629 } 630 if (tdefL == 0) { 631 IntStream.of(CodecEncoding.getSpecifier(tokenCodec, null)).forEach(extraBandMetadata::add); 632 } 633 if (unfavouredCodec != null) { 634 IntStream.of(CodecEncoding.getSpecifier(unfavouredCodec, null)).forEach(extraBandMetadata::add); 635 } 636 final int[] extraMetadata = extraBandMetadata.toArray(); 637 final byte[] extraMetadataEncoded = Codec.UNSIGNED5.encode(extraMetadata); 638 if (defaultCodec.isSigned()) { 639 specifier = -1 - specifier; 640 } else { 641 specifier = specifier + defaultCodec.getL(); 642 } 643 final byte[] firstValueEncoded = defaultCodec.encode(new int[] {specifier}); 644 final int totalBandLength = firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length 645 + unfavouredEncoded.length; 646 647 if (totalBandLength + extraMetadataEncoded.length < results.encodedBand.length) { 648 results.saved += results.encodedBand.length - (totalBandLength + extraMetadataEncoded.length); 649 final byte[] encodedBand = new byte[totalBandLength]; 650 System.arraycopy(firstValueEncoded, 0, encodedBand, 0, firstValueEncoded.length); 651 System.arraycopy(favouredEncoded, 0, encodedBand, firstValueEncoded.length, favouredEncoded.length); 652 System.arraycopy(tokensEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length, 653 tokensEncoded.length); 654 System.arraycopy(unfavouredEncoded, 0, encodedBand, 655 firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length, unfavouredEncoded.length); 656 results.encodedBand = encodedBand; 657 results.extraMetadata = extraMetadata; 658 if (l != 0) { 659 results.betterCodec = new PopulationCodec(favouredCodec, l, unfavouredCodec); 660 } else { 661 results.betterCodec = new PopulationCodec(favouredCodec, tokenCodec, unfavouredCodec); 662 } 663 } 664 } 665 666 /* 667 * Flatten a 2-dimension array into a 1-dimension array 668 */ 669 private long[] flatten(final long[][] flags) { 670 int totalSize = 0; 671 for (final long[] flag : flags) { 672 totalSize += flag.length; 673 } 674 final long[] flatArray = new long[totalSize]; 675 int index = 0; 676 for (final long[] flag : flags) { 677 for (final long element : flag) { 678 flatArray[index] = element; 679 index++; 680 } 681 } 682 return flatArray; 683 } 684 685 /** 686 * Converts a list of Integers to an int[] array. 687 * 688 * @param integerList conversion source. 689 * @return conversion result. 690 */ 691 protected int[] integerListToArray(final List<Integer> integerList) { 692 return integerList.stream().mapToInt(Integer::intValue).toArray(); 693 } 694 695 /** 696 * Converts a list of Longs to an long[] array. 697 * 698 * @param longList conversion source. 699 * @return conversion result. 700 */ 701 protected long[] longListToArray(final List<Long> longList) { 702 return longList.stream().mapToLong(Long::longValue).toArray(); 703 } 704 705 /** 706 * Write the packed set of bands to the given output stream 707 * 708 * @param out TODO 709 * @throws IOException If an I/O error occurs. 710 * @throws Pack200Exception TODO 711 */ 712 public abstract void pack(OutputStream out) throws IOException, Pack200Exception; 713 714 private boolean timeToStop(final BandAnalysisResults results) { 715 // if tried more than effort number of codecs for this band then return 716 // Note: these values have been tuned - please test carefully if changing them 717 if (effort > 6) { 718 return results.numCodecsTried >= effort * 2; 719 } 720 return results.numCodecsTried >= effort; 721 // May want to also check how much we've saved if performance needs improving, e.g. saved more than effort*2 % 722 // || (float)results.saved/(float)results.encodedBand.length > (float)effort * 2/100; 723 } 724 725 private void tryCodecs(final String name, final int[] band, final BHSDCodec defaultCodec, final BandData bandData, 726 final BandAnalysisResults results, final byte[] encoded, final BHSDCodec[] potentialCodecs) 727 throws Pack200Exception { 728 for (final BHSDCodec potential : potentialCodecs) { 729 if (potential.equals(defaultCodec)) { 730 return; // don't try codecs with greater cardinality in the same 'family' as the default codec as there 731 // won't be any savings 732 } 733 if (potential.isDelta()) { 734 if (potential.largest() >= bandData.largestDelta && potential.smallest() <= bandData.smallestDelta 735 && potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) { 736 // TODO: can represent some negative deltas with overflow 737 final byte[] encoded2 = potential.encode(band); 738 results.numCodecsTried++; 739 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 740 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 741 if (saved > results.saved) { 742 results.betterCodec = potential; 743 results.encodedBand = encoded2; 744 results.saved = saved; 745 } 746 } 747 } else if (potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) { 748 final byte[] encoded2 = potential.encode(band); 749 results.numCodecsTried++; 750 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 751 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 752 if (saved > results.saved) { 753 results.betterCodec = potential; 754 results.encodedBand = encoded2; 755 results.saved = saved; 756 } 757 } 758 if (timeToStop(results)) { 759 return; 760 } 761 } 762 } 763 764}