001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.harmony.pack200;
018
019import java.io.EOFException;
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.ArrayList;
023import java.util.List;
024
025/**
026 * A BHSD codec is a means of encoding integer values as a sequence of bytes or vice versa using a specified "BHSD"
027 * encoding mechanism. It uses a variable-length encoding and a modified sign representation such that small numbers are
028 * represented as a single byte, whilst larger numbers take more bytes to encode. The number may be signed or unsigned;
029 * if it is unsigned, it can be weighted towards positive numbers or equally distributed using a one's complement. The
030 * Codec also supports delta coding, where a sequence of numbers is represented as a series of first-order differences.
031 * So a delta encoding of the integers [1..10] would be represented as a sequence of 10x1s. This allows the absolute
032 * value of a coded integer to fall outside of the 'small number' range, whilst still being encoded as a single byte.
033 *
034 * A BHSD codec is configured with four parameters:
035 * <dl>
036 * <dt>B</dt>
037 * <dd>The maximum number of bytes that each value is encoded as. B must be a value between [1..5]. For a pass-through
038 * coding (where each byte is encoded as itself, aka {@link #BYTE1}, B is 1 (each byte takes a maximum of 1 byte).</dd>
039 * <dt>H</dt>
040 * <dd>The radix of the integer. Values are defined as a sequence of values, where value <code>n</code> is multiplied by
041 * <code>H^<sup>n</sup></code>. So the number 1234 may be represented as the sequence 4 3 2 1 with a radix (H) of 10.
042 * Note that other permutations are also possible; 43 2 1 will also encode 1234. The co-parameter L is defined as 256-H.
043 * This is important because only the last value in a sequence may be &lt; L; all prior values must be &gt; L.</dd>
044 * <dt>S</dt>
045 * <dd>Whether the codec represents signed values (or not). This may have 3 values; 0 (unsigned), 1 (signed, one's
046 * complement) or 2 (signed, two's complement)</dd>
047 * <dt>D</dt>
048 * <dd>Whether the codec represents a delta encoding. This may be 0 (no delta) or 1 (delta encoding). A delta encoding
049 * of 1 indicates that values are cumulative; a sequence of <code>1 1 1 1 1</code> will represent the sequence
050 * <code>1 2 3 4 5</code>. For this reason, the codec supports two variants of decode; one
051 * {@link #decode(InputStream, long) with} and one {@link #decode(InputStream) without} a <code>last</code> parameter.
052 * If the codec is a non-delta encoding, then the value is ignored if passed. If the codec is a delta encoding, it is a
053 * run-time error to call the value without the extra parameter, and the previous value should be returned. (It was
054 * designed this way to support multi-threaded access without requiring a new instance of the Codec to be cloned for
055 * each use.)
056 * <dt>
057 * </dl>
058 *
059 * Codecs are notated as (B,H,S,D) and either D or S,D may be omitted if zero. Thus {@link #BYTE1} is denoted
060 * (1,256,0,0) or (1,256). The {@link #toString()} method prints out the condensed form of the encoding. Often, the last
061 * character in the name ({@link #BYTE1}, {@link #UNSIGNED5}) gives a clue as to the B value. Those that start with U
062 * ({@link #UDELTA5}, {@link #UNSIGNED5}) are unsigned; otherwise, in most cases, they are signed. The presence of the
063 * word Delta ({@link #DELTA5}, {@link #UDELTA5}) indicates a delta encoding is used.
064 *
065 */
066public final class BHSDCodec extends Codec {
067
068    /**
069     * The maximum number of bytes in each coding word
070     */
071    private final int b;
072
073    /**
074     * Whether delta encoding is used (0=false,1=true)
075     */
076    private final int d;
077
078    /**
079     * The radix of the encoding
080     */
081    private final int h;
082
083    /**
084     * The co-parameter of h; 256-h
085     */
086    private final int l;
087
088    /**
089     * Represents signed numbers or not (0=unsigned,1/2=signed)
090     */
091    private final int s;
092
093    private long cardinality;
094
095    private final long smallest;
096
097    private final long largest;
098
099    /**
100     * radix^i powers
101     */
102    private final long[] powers;
103
104    /**
105     * Constructs an unsigned, non-delta Codec with the given B and H values.
106     *
107     * @param b the maximum number of bytes that a value can be encoded as [1..5]
108     * @param h the radix of the encoding [1..256]
109     */
110    public BHSDCodec(final int b, final int h) {
111        this(b, h, 0, 0);
112    }
113
114    /**
115     * Constructs a non-delta Codec with the given B, H and S values.
116     *
117     * @param b the maximum number of bytes that a value can be encoded as [1..5]
118     * @param h the radix of the encoding [1..256]
119     * @param s whether the encoding represents signed numbers (s=0 is unsigned; s=1 is signed with 1s complement; s=2
120     *        is signed with ?)
121     */
122    public BHSDCodec(final int b, final int h, final int s) {
123        this(b, h, s, 0);
124    }
125
126    /**
127     * Constructs a Codec with the given B, H, S and D values.
128     *
129     * @param b the maximum number of bytes that a value can be encoded as [1..5]
130     * @param h the radix of the encoding [1..256]
131     * @param s whether the encoding represents signed numbers (s=0 is unsigned; s=1 is signed with 1s complement; s=2
132     *        is signed with ?)
133     * @param d whether this is a delta encoding (d=0 is non-delta; d=1 is delta)
134     */
135    public BHSDCodec(final int b, final int h, final int s, final int d) {
136        if (b < 1 || b > 5) {
137            throw new IllegalArgumentException("1<=b<=5");
138        }
139        if (h < 1 || h > 256) {
140            throw new IllegalArgumentException("1<=h<=256");
141        }
142        if (s < 0 || s > 2) {
143            throw new IllegalArgumentException("0<=s<=2");
144        }
145        if (d < 0 || d > 1) {
146            throw new IllegalArgumentException("0<=d<=1");
147        }
148        if (b == 1 && h != 256) {
149            throw new IllegalArgumentException("b=1 -> h=256");
150        }
151        if (h == 256 && b == 5) {
152            throw new IllegalArgumentException("h=256 -> b!=5");
153        }
154        this.b = b;
155        this.h = h;
156        this.s = s;
157        this.d = d;
158        this.l = 256 - h;
159        if (h == 1) {
160            cardinality = b * 255 + 1;
161        } else {
162            cardinality = (long) ((long) (l * (1 - Math.pow(h, b)) / (1 - h)) + Math.pow(h, b));
163        }
164        smallest = calculateSmallest();
165        largest = calculateLargest();
166
167        powers = new long[b];
168        for (int c = 0; c < b; c++) {
169            powers[c] = (long) Math.pow(h, c);
170        }
171    }
172
173    /**
174     * Returns the cardinality of this codec; that is, the number of distinct values that it can contain.
175     *
176     * @return the cardinality of this codec
177     */
178    public long cardinality() {
179        return cardinality;
180    }
181
182    @Override
183    public int decode(final InputStream in) throws IOException, Pack200Exception {
184        if (d != 0) {
185            throw new Pack200Exception("Delta encoding used without passing in last value; this is a coding error");
186        }
187        return decode(in, 0);
188    }
189
190    @Override
191    public int decode(final InputStream in, final long last) throws IOException, Pack200Exception {
192        int n = 0;
193        long z = 0;
194        long x = 0;
195
196        do {
197            x = in.read();
198            lastBandLength++;
199            z += x * powers[n];
200            n++;
201        } while (x >= l && n < b);
202
203        if (x == -1) {
204            throw new EOFException("End of stream reached whilst decoding");
205        }
206
207        if (isSigned()) {
208            final int u = ((1 << s) - 1);
209            if ((z & u) == u) {
210                z = z >>> s ^ -1L;
211            } else {
212                z = z - (z >>> s);
213            }
214        }
215        // This algorithm does the same thing, but is probably slower. Leaving
216        // in for now for readability
217        // if(isSigned()) {
218        // long u = z;
219        // long twoPowS = (long)Math.pow(2, s);
220        // double twoPowSMinusOne = twoPowS-1;
221        // if(u % twoPowS < twoPowSMinusOne) {
222        // if(cardinality < Math.pow(2, 32)) {
223        // z = (long) (u - (Math.floor(u/ twoPowS)));
224        // } else {
225        // z = cast32((long) (u - (Math.floor(u/ twoPowS))));
226        // }
227        // } else {
228        // z = (long) (-Math.floor(u/ twoPowS) - 1);
229        // }
230        // }
231        if (isDelta()) {
232            z += last;
233        }
234        return (int) z;
235    }
236
237    @Override
238    public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
239        final int[] band = super.decodeInts(n, in);
240        if (isDelta()) {
241            for (int i = 0; i < band.length; i++) {
242                while (band[i] > largest) {
243                    band[i] -= cardinality;
244                }
245                while (band[i] < smallest) {
246                    band[i] += cardinality;
247                }
248            }
249        }
250        return band;
251    }
252
253    @Override
254    public int[] decodeInts(final int n, final InputStream in, final int firstValue)
255        throws IOException, Pack200Exception {
256        final int[] band = super.decodeInts(n, in, firstValue);
257        if (isDelta()) {
258            for (int i = 0; i < band.length; i++) {
259                while (band[i] > largest) {
260                    band[i] -= cardinality;
261                }
262                while (band[i] < smallest) {
263                    band[i] += cardinality;
264                }
265            }
266        }
267        return band;
268    }
269
270    // private long cast32(long u) {
271    // u = (long) ((long) ((u + Math.pow(2, 31)) % Math.pow(2, 32)) -
272    // Math.pow(2, 31));
273    // return u;
274    // }
275
276    /**
277     * True if this encoding can code the given value
278     *
279     * @param value the value to check
280     * @return <code>true</code> if the encoding can encode this value
281     */
282    public boolean encodes(final long value) {
283        return value >= smallest && value <= largest;
284    }
285
286    @Override
287    public byte[] encode(final int value, final int last) throws Pack200Exception {
288        if (!encodes(value)) {
289            throw new Pack200Exception("The codec " + toString() + " does not encode the value " + value);
290        }
291
292        long z = value;
293        if (isDelta()) {
294            z -= last;
295        }
296        if (isSigned()) {
297            if (z < Integer.MIN_VALUE) {
298                z += 4294967296L;
299            } else if (z > Integer.MAX_VALUE) {
300                z -= 4294967296L;
301            }
302            if (z < 0) {
303                z = (-z << s) - 1;
304            } else if (s == 1) {
305                z = z << s;
306            } else {
307                z += (z - z % 3) / 3;
308            }
309        } else if (z < 0) {
310            // Need to use integer overflow here to represent negatives.
311            if (cardinality < 4294967296L) {
312                z += cardinality;
313            } else {
314                z += 4294967296L; // this value is equal to (1 << 32).
315            }
316        }
317        if (z < 0) {
318            throw new Pack200Exception("unable to encode");
319        }
320
321        final List byteList = new ArrayList();
322        for (int n = 0; n < b; n++) {
323            long byteN;
324            if (z < l) {
325                byteN = z;
326            } else {
327                byteN = z % h;
328                while (byteN < l) {
329                    byteN += h;
330                }
331            }
332            byteList.add(Byte.valueOf((byte) byteN));
333            if (byteN < l) {
334                break;
335            }
336            z -= byteN;
337            z /= h;
338        }
339        final byte[] bytes = new byte[byteList.size()];
340        for (int i = 0; i < bytes.length; i++) {
341            bytes[i] = ((Byte) byteList.get(i)).byteValue();
342        }
343        return bytes;
344    }
345
346    @Override
347    public byte[] encode(final int value) throws Pack200Exception {
348        return encode(value, 0);
349    }
350
351    /**
352     * Returns true if this codec is a delta codec
353     *
354     * @return true if this codec is a delta codec
355     */
356    public boolean isDelta() {
357        return d != 0;
358    }
359
360    /**
361     * Returns true if this codec is a signed codec
362     *
363     * @return true if this codec is a signed codec
364     */
365    public boolean isSigned() {
366        return s != 0;
367    }
368
369    /**
370     * Returns the largest value that this codec can represent.
371     *
372     * @return the largest value that this codec can represent.
373     */
374    public long largest() {
375        return largest;
376    }
377
378    private long calculateLargest() {
379        long result;
380        // TODO This can probably be optimized into a better mathematical
381        // statement
382        if (d == 1) {
383            final BHSDCodec bh0 = new BHSDCodec(b, h);
384            return bh0.largest();
385        }
386        if (s == 0) {
387            result = cardinality() - 1;
388        } else if (s == 1) {
389            result = cardinality() / 2 - 1;
390        } else if (s == 2) {
391            result = (3L * cardinality()) / 4 - 1;
392        } else {
393            throw new Error("Unknown s value");
394        }
395        return Math.min((s == 0 ? ((long) Integer.MAX_VALUE) << 1 : Integer.MAX_VALUE) - 1, result);
396    }
397
398    /**
399     * Returns the smallest value that this codec can represent.
400     *
401     * @return the smallest value that this codec can represent.
402     */
403    public long smallest() {
404        return smallest;
405    }
406
407    private long calculateSmallest() {
408        long result;
409        if (d == 1 || !isSigned()) {
410            if (cardinality >= 4294967296L) { // 2^32
411                result = Integer.MIN_VALUE;
412            } else {
413                result = 0;
414            }
415        } else {
416            result = Math.max(Integer.MIN_VALUE, -cardinality() / (1 << s));
417        }
418        return result;
419    }
420
421    /**
422     * Returns the codec in the form (1,256) or (1,64,1,1). Note that trailing zero fields are not shown.
423     */
424    @Override
425    public String toString() {
426        final StringBuffer buffer = new StringBuffer(11);
427        buffer.append('(');
428        buffer.append(b);
429        buffer.append(',');
430        buffer.append(h);
431        if (s != 0 || d != 0) {
432            buffer.append(',');
433            buffer.append(s);
434        }
435        if (d != 0) {
436            buffer.append(',');
437            buffer.append(d);
438        }
439        buffer.append(')');
440        return buffer.toString();
441    }
442
443    /**
444     * @return the b
445     */
446    public int getB() {
447        return b;
448    }
449
450    /**
451     * @return the h
452     */
453    public int getH() {
454        return h;
455    }
456
457    /**
458     * @return the s
459     */
460    public int getS() {
461        return s;
462    }
463
464    /**
465     * @return the l
466     */
467    public int getL() {
468        return l;
469    }
470
471    @Override
472    public boolean equals(final Object o) {
473        if (o instanceof BHSDCodec) {
474            final BHSDCodec codec = (BHSDCodec) o;
475            return codec.b == b && codec.h == h && codec.s == s && codec.d == d;
476        }
477        return false;
478    }
479
480    @Override
481    public int hashCode() {
482        return ((b * 37 + h) * 37 + s) * 37 + d;
483    }
484}