View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataOutputStream;
24  import java.nio.ByteBuffer;
25  
26  import junit.framework.TestCase;
27  import org.apache.hadoop.hbase.testclassification.SmallTests;
28  import org.junit.experimental.categories.Category;
29  
30  @Category(SmallTests.class)
31  public class TestByteBloomFilter extends TestCase {
32  
33    public void testBasicBloom() throws Exception {
34      ByteBloomFilter bf1 = new ByteBloomFilter(1000, (float)0.01, Hash.MURMUR_HASH, 0);
35      ByteBloomFilter bf2 = new ByteBloomFilter(1000, (float)0.01, Hash.MURMUR_HASH, 0);
36      bf1.allocBloom();
37      bf2.allocBloom();
38  
39      // test 1: verify no fundamental false negatives or positives
40      byte[] key1 = {1,2,3,4,5,6,7,8,9};
41      byte[] key2 = {1,2,3,4,5,6,7,8,7};
42  
43      bf1.add(key1);
44      bf2.add(key2);
45  
46      assertTrue(bf1.contains(key1));
47      assertFalse(bf1.contains(key2));
48      assertFalse(bf2.contains(key1));
49      assertTrue(bf2.contains(key2));
50  
51      byte [] bkey = {1,2,3,4};
52      byte [] bval = "this is a much larger byte array".getBytes();
53  
54      bf1.add(bkey);
55      bf1.add(bval, 1, bval.length-1);
56  
57      assertTrue( bf1.contains(bkey) );
58      assertTrue( bf1.contains(bval, 1, bval.length-1) );
59      assertFalse( bf1.contains(bval) );
60      assertFalse( bf1.contains(bval) );
61  
62      // test 2: serialization & deserialization.
63      // (convert bloom to byte array & read byte array back in as input)
64      ByteArrayOutputStream bOut = new ByteArrayOutputStream();
65      bf1.writeBloom(new DataOutputStream(bOut));
66      ByteBuffer bb = ByteBuffer.wrap(bOut.toByteArray());
67      ByteBloomFilter newBf1 = new ByteBloomFilter(1000, (float)0.01,
68          Hash.MURMUR_HASH, 0);
69      assertTrue(newBf1.contains(key1, bb));
70      assertFalse(newBf1.contains(key2, bb));
71      assertTrue( newBf1.contains(bkey, bb) );
72      assertTrue( newBf1.contains(bval, 1, bval.length-1, bb) );
73      assertFalse( newBf1.contains(bval, bb) );
74      assertFalse( newBf1.contains(bval, bb) );
75  
76      System.out.println("Serialized as " + bOut.size() + " bytes");
77      assertTrue(bOut.size() - bf1.byteSize < 10); //... allow small padding
78    }
79  
80    public void testBloomFold() throws Exception {
81      // test: foldFactor < log(max/actual)
82      ByteBloomFilter b = new ByteBloomFilter(1003, (float) 0.01,
83          Hash.MURMUR_HASH, 2);
84      b.allocBloom();
85      long origSize = b.getByteSize();
86      assertEquals(1204, origSize);
87      for (int i = 0; i < 12; ++i) {
88        b.add(Bytes.toBytes(i));
89      }
90      b.compactBloom();
91      assertEquals(origSize>>2, b.getByteSize());
92      int falsePositives = 0;
93      for (int i = 0; i < 25; ++i) {
94        if (b.contains(Bytes.toBytes(i))) {
95          if(i >= 12) falsePositives++;
96        } else {
97          assertFalse(i < 12);
98        }
99      }
100     assertTrue(falsePositives <= 1);
101 
102     // test: foldFactor > log(max/actual)
103   }
104 
105   public void testBloomPerf() throws Exception {
106     // add
107     float err = (float)0.01;
108     ByteBloomFilter b = new ByteBloomFilter(10*1000*1000, (float)err, Hash.MURMUR_HASH, 3);
109     b.allocBloom();
110     long startTime =  System.currentTimeMillis();
111     long origSize = b.getByteSize();
112     for (int i = 0; i < 1*1000*1000; ++i) {
113       b.add(Bytes.toBytes(i));
114     }
115     long endTime = System.currentTimeMillis();
116     System.out.println("Total Add time = " + (endTime - startTime) + "ms");
117 
118     // fold
119     startTime = System.currentTimeMillis();
120     b.compactBloom();
121     endTime = System.currentTimeMillis();
122     System.out.println("Total Fold time = " + (endTime - startTime) + "ms");
123     assertTrue(origSize >= b.getByteSize()<<3);
124 
125     // test
126     startTime = System.currentTimeMillis();
127     int falsePositives = 0;
128     for (int i = 0; i < 2*1000*1000; ++i) {
129 
130       if (b.contains(Bytes.toBytes(i))) {
131         if(i >= 1*1000*1000) falsePositives++;
132       } else {
133         assertFalse(i < 1*1000*1000);
134       }
135     }
136     endTime = System.currentTimeMillis();
137     System.out.println("Total Contains time = " + (endTime - startTime) + "ms");
138     System.out.println("False Positive = " + falsePositives);
139     assertTrue(falsePositives <= (1*1000*1000)*err);
140 
141     // test: foldFactor > log(max/actual)
142   }
143 
144   public void testSizing() {
145     int bitSize = 8 * 128 * 1024; // 128 KB
146     double errorRate = 0.025; // target false positive rate
147 
148     // How many keys can we store in a Bloom filter of this size maintaining
149     // the given false positive rate, not taking into account that the n
150     long maxKeys = ByteBloomFilter.idealMaxKeys(bitSize, errorRate);
151     assertEquals(136570, maxKeys);
152 
153     // A reverse operation: how many bits would we need to store this many keys
154     // and keep the same low false positive rate?
155     long bitSize2 = ByteBloomFilter.computeBitSize(maxKeys, errorRate);
156 
157     // The bit size comes out a little different due to rounding.
158     assertTrue(Math.abs(bitSize2 - bitSize) * 1.0 / bitSize < 1e-5);
159   }
160 
161   public void testFoldableByteSize() {
162     assertEquals(128, ByteBloomFilter.computeFoldableByteSize(1000, 5));
163     assertEquals(640, ByteBloomFilter.computeFoldableByteSize(5001, 4));
164   }
165 
166 
167 }
168