View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collection;
24  import java.util.Iterator;
25  import java.util.List;
26  
27  import static org.junit.Assert.assertEquals;
28  import static org.junit.Assert.assertFalse;
29  import static org.junit.Assert.assertTrue;
30  
31  import org.junit.Before;
32  import org.junit.Test;
33  
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataOutputStream;
36  import org.apache.hadoop.fs.FileSystem;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.Cell;
39  import org.apache.hadoop.hbase.CellUtil;
40  import org.apache.hadoop.hbase.HBaseTestingUtility;
41  import org.apache.hadoop.hbase.HConstants;
42  import org.apache.hadoop.hbase.KeyValue;
43  import org.apache.hadoop.hbase.KeyValueUtil;
44  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
45  import org.apache.hadoop.hbase.testclassification.SmallTests;
46  import org.apache.hadoop.hbase.Tag;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.junit.experimental.categories.Category;
49  import org.junit.runner.RunWith;
50  import org.junit.runners.Parameterized;
51  import org.junit.runners.Parameterized.Parameters;
52  
53  /**
54   * Test {@link HFileScanner#seekTo(byte[])} and its variants.
55   */
56  @Category(SmallTests.class)
57  @RunWith(Parameterized.class)
58  public class TestSeekTo {
59  
60    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
61    private final DataBlockEncoding encoding;
62  
63    @Parameters
64    public static Collection<Object[]> parameters() {
65      List<Object[]> paramList = new ArrayList<Object[]>();
66      for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
67        paramList.add(new Object[] { encoding });
68      }
69      return paramList;
70    }
71  
72    static boolean switchKVs = false;
73  
74    public TestSeekTo(DataBlockEncoding encoding) {
75      this.encoding = encoding;
76    }
77     
78     @Before
79     public void setUp() {
80       //reset
81       switchKVs = false;
82     }
83  
84    static KeyValue toKV(String row, TagUsage tagUsage) {
85      if (tagUsage == TagUsage.NO_TAG) {
86        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
87            Bytes.toBytes("value"));
88      } else if (tagUsage == TagUsage.ONLY_TAG) {
89        Tag t = new Tag((byte) 1, "myTag1");
90        Tag[] tags = new Tag[1];
91        tags[0] = t;
92        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
93            HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
94      } else {
95        if (!switchKVs) {
96          switchKVs = true;
97          return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
98              Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"));
99        } else {
100         switchKVs = false;
101         Tag t = new Tag((byte) 1, "myTag1");
102         Tag[] tags = new Tag[1];
103         tags[0] = t;
104         return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
105             Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
106       }
107     }
108   }
109 
110   static String toRowStr(Cell kv) {
111     return Bytes.toString(KeyValueUtil.ensureKeyValue(kv).getRow());
112   }
113 
114   Path makeNewFile(TagUsage tagUsage) throws IOException {
115     Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile");
116     FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile);
117     Configuration conf = TEST_UTIL.getConfiguration();
118     if (tagUsage != TagUsage.NO_TAG) {
119       conf.setInt("hfile.format.version", 3);
120     } else {
121       conf.setInt("hfile.format.version", 2);
122     }
123     int blocksize = toKV("a", tagUsage).getLength() * 3;
124     HFileContext context = new HFileContextBuilder().withBlockSize(blocksize)
125         .withDataBlockEncoding(encoding)
126         .withIncludesTags(true).build();
127     HFile.Writer writer = HFile.getWriterFactoryNoCache(conf).withOutputStream(fout)
128         .withFileContext(context)
129         .withComparator(KeyValue.COMPARATOR).create();
130     // 4 bytes * 3 * 2 for each key/value +
131     // 3 for keys, 15 for values = 42 (woot)
132     writer.append(toKV("c", tagUsage));
133     writer.append(toKV("e", tagUsage));
134     writer.append(toKV("g", tagUsage));
135     // block transition
136     writer.append(toKV("i", tagUsage));
137     writer.append(toKV("k", tagUsage));
138     writer.close();
139     fout.close();
140     return ncTFile;
141   }
142 
143   @Test
144   public void testSeekBefore() throws Exception {
145     testSeekBeforeInternals(TagUsage.NO_TAG);
146     testSeekBeforeInternals(TagUsage.ONLY_TAG);
147     testSeekBeforeInternals(TagUsage.PARTIAL_TAG);
148   }
149 
150   protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException {
151     Path p = makeNewFile(tagUsage);
152     FileSystem fs = TEST_UTIL.getTestFileSystem();
153     Configuration conf = TEST_UTIL.getConfiguration();
154     HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf);
155     reader.loadFileInfo();
156     HFileScanner scanner = reader.getScanner(false, true);
157     assertEquals(false, scanner.seekBefore(toKV("a", tagUsage)));
158 
159     assertEquals(false, scanner.seekBefore(toKV("c", tagUsage)));
160 
161     assertEquals(true, scanner.seekBefore(toKV("d", tagUsage)));
162     assertEquals("c", toRowStr(scanner.getKeyValue()));
163 
164     assertEquals(true, scanner.seekBefore(toKV("e", tagUsage)));
165     assertEquals("c", toRowStr(scanner.getKeyValue()));
166 
167     assertEquals(true, scanner.seekBefore(toKV("f", tagUsage)));
168     assertEquals("e", toRowStr(scanner.getKeyValue()));
169 
170     assertEquals(true, scanner.seekBefore(toKV("g", tagUsage)));
171     assertEquals("e", toRowStr(scanner.getKeyValue()));
172 
173     assertEquals(true, scanner.seekBefore(toKV("h", tagUsage)));
174     assertEquals("g", toRowStr(scanner.getKeyValue()));
175     assertEquals(true, scanner.seekBefore(toKV("i", tagUsage)));
176     assertEquals("g", toRowStr(scanner.getKeyValue()));
177     assertEquals(true, scanner.seekBefore(toKV("j", tagUsage)));
178     assertEquals("i", toRowStr(scanner.getKeyValue()));
179     Cell cell = scanner.getKeyValue();
180     if (tagUsage != TagUsage.NO_TAG && cell.getTagsLength() > 0) {
181       Iterator<Tag> tagsIterator = CellUtil.tagsIterator(cell.getTagsArray(), cell.getTagsOffset(),
182           cell.getTagsLength());
183       while (tagsIterator.hasNext()) {
184         Tag next = tagsIterator.next();
185         assertEquals("myTag1", Bytes.toString(next.getValue()));
186       }
187     }
188     assertEquals(true, scanner.seekBefore(toKV("k", tagUsage)));
189     assertEquals("i", toRowStr(scanner.getKeyValue()));
190     assertEquals(true, scanner.seekBefore(toKV("l", tagUsage)));
191     assertEquals("k", toRowStr(scanner.getKeyValue()));
192 
193     reader.close();
194     deleteTestDir(fs);
195   }
196 
197   @Test
198   public void testSeekBeforeWithReSeekTo() throws Exception {
199     testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG);
200     testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG);
201     testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG);
202   }
203 
204   protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException {
205     Path p = makeNewFile(tagUsage);
206     FileSystem fs = TEST_UTIL.getTestFileSystem();
207     Configuration conf = TEST_UTIL.getConfiguration();
208     HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf);
209     reader.loadFileInfo();
210     HFileScanner scanner = reader.getScanner(false, true);
211     assertEquals(false, scanner.seekBefore(toKV("a", tagUsage)));
212     assertEquals(false, scanner.seekBefore(toKV("b", tagUsage)));
213     assertEquals(false, scanner.seekBefore(toKV("c", tagUsage)));
214 
215     // seekBefore d, so the scanner points to c
216     assertEquals(true, scanner.seekBefore(toKV("d", tagUsage)));
217     assertEquals("c", toRowStr(scanner.getKeyValue()));
218     // reseekTo e and g
219     assertEquals(0, scanner.reseekTo(toKV("c", tagUsage)));
220     assertEquals("c", toRowStr(scanner.getKeyValue()));
221     assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
222     assertEquals("g", toRowStr(scanner.getKeyValue()));
223 
224     // seekBefore e, so the scanner points to c
225     assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
226     assertEquals("c", toRowStr(scanner.getKeyValue()));
227     // reseekTo e and g
228     assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
229     assertEquals("e", toRowStr(scanner.getKeyValue()));
230     assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
231     assertEquals("g", toRowStr(scanner.getKeyValue()));
232 
233     // seekBefore f, so the scanner points to e
234     assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
235     assertEquals("e", toRowStr(scanner.getKeyValue()));
236     // reseekTo e and g
237     assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
238     assertEquals("e", toRowStr(scanner.getKeyValue()));
239     assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
240     assertEquals("g", toRowStr(scanner.getKeyValue()));
241 
242     // seekBefore g, so the scanner points to e
243     assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
244     assertEquals("e", toRowStr(scanner.getKeyValue()));
245     // reseekTo e and g again
246     assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
247     assertEquals("e", toRowStr(scanner.getKeyValue()));
248     assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
249     assertEquals("g", toRowStr(scanner.getKeyValue()));
250 
251     // seekBefore h, so the scanner points to g
252     assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
253     assertEquals("g", toRowStr(scanner.getKeyValue()));
254     // reseekTo g
255     assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
256     assertEquals("g", toRowStr(scanner.getKeyValue()));
257 
258     // seekBefore i, so the scanner points to g
259     assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
260     assertEquals("g", toRowStr(scanner.getKeyValue()));
261     // reseekTo g
262     assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
263     assertEquals("g", toRowStr(scanner.getKeyValue()));
264 
265     // seekBefore j, so the scanner points to i
266     assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
267     assertEquals("i", toRowStr(scanner.getKeyValue()));
268     // reseekTo i
269     assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
270     assertEquals("i", toRowStr(scanner.getKeyValue()));
271 
272     // seekBefore k, so the scanner points to i
273     assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
274     assertEquals("i", toRowStr(scanner.getKeyValue()));
275     // reseekTo i and k
276     assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
277     assertEquals("i", toRowStr(scanner.getKeyValue()));
278     assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
279     assertEquals("k", toRowStr(scanner.getKeyValue()));
280 
281     // seekBefore l, so the scanner points to k
282     assertEquals(true, scanner.seekBefore(toKV("l", tagUsage)));
283     assertEquals("k", toRowStr(scanner.getKeyValue()));
284     // reseekTo k
285     assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
286     assertEquals("k", toRowStr(scanner.getKeyValue()));
287     deleteTestDir(fs);
288   }
289 
290   @Test
291   public void testSeekTo() throws Exception {
292     testSeekToInternals(TagUsage.NO_TAG);
293     testSeekToInternals(TagUsage.ONLY_TAG);
294     testSeekToInternals(TagUsage.PARTIAL_TAG);
295   }
296 
297   protected void testSeekToInternals(TagUsage tagUsage) throws IOException {
298     Path p = makeNewFile(tagUsage);
299     FileSystem fs = TEST_UTIL.getTestFileSystem();
300     Configuration conf = TEST_UTIL.getConfiguration();
301     HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf);
302     reader.loadFileInfo();
303     assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount());
304     HFileScanner scanner = reader.getScanner(false, true);
305     // lies before the start of the file.
306     assertEquals(-1, scanner.seekTo(toKV("a", tagUsage)));
307 
308     assertEquals(1, scanner.seekTo(toKV("d", tagUsage)));
309     assertEquals("c", toRowStr(scanner.getKeyValue()));
310 
311     // Across a block boundary now.
312     // 'h' does not exist so we will get a '1' back for not found.
313     assertEquals(0, scanner.seekTo(toKV("i", tagUsage)));
314     assertEquals("i", toRowStr(scanner.getKeyValue()));
315     assertEquals(1, scanner.seekTo(toKV("l", tagUsage)));
316     if (encoding == DataBlockEncoding.PREFIX_TREE) {
317       // TODO : Fix this
318       assertEquals(null, scanner.getKeyValue());
319     } else {
320       assertEquals("k", toRowStr(scanner.getKeyValue()));
321     }
322 
323     reader.close();
324     deleteTestDir(fs);
325   }
326 
327   @Test
328   public void testBlockContainingKey() throws Exception {
329     testBlockContainingKeyInternals(TagUsage.NO_TAG);
330     testBlockContainingKeyInternals(TagUsage.ONLY_TAG);
331     testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG);
332   }
333 
334   protected void deleteTestDir(FileSystem fs) throws IOException {
335     Path dataTestDir = TEST_UTIL.getDataTestDir();
336     if (fs.exists(dataTestDir)) {
337       fs.delete(dataTestDir, true);
338     }
339   }
340   protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException {
341     Path p = makeNewFile(tagUsage);
342     FileSystem fs = TEST_UTIL.getTestFileSystem();
343     Configuration conf = TEST_UTIL.getConfiguration();
344     HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf);
345     reader.loadFileInfo();
346     HFileBlockIndex.BlockIndexReader blockIndexReader = 
347       reader.getDataBlockIndexReader();
348     System.out.println(blockIndexReader.toString());
349     // falls before the start of the file.
350     assertEquals(-1, blockIndexReader.rootBlockContainingKey(
351         toKV("a", tagUsage)));
352     assertEquals(0, blockIndexReader.rootBlockContainingKey(
353         toKV("c", tagUsage)));
354     assertEquals(0, blockIndexReader.rootBlockContainingKey(
355         toKV("d", tagUsage)));
356     assertEquals(0, blockIndexReader.rootBlockContainingKey(
357         toKV("e", tagUsage)));
358     assertEquals(0, blockIndexReader.rootBlockContainingKey(
359         toKV("g", tagUsage)));
360     assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("h", tagUsage)));
361     assertEquals(1, blockIndexReader.rootBlockContainingKey(
362         toKV("i", tagUsage)));
363     assertEquals(1, blockIndexReader.rootBlockContainingKey(
364         toKV("j", tagUsage)));
365     assertEquals(1, blockIndexReader.rootBlockContainingKey(
366         toKV("k", tagUsage)));
367     assertEquals(1, blockIndexReader.rootBlockContainingKey(
368         toKV("l", tagUsage)));
369     reader.close();
370     deleteTestDir(fs);
371   }
372 }