1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.filter;
20
21 import static org.junit.Assert.*;
22
23 import java.util.regex.Pattern;
24
25 import org.apache.hadoop.hbase.testclassification.SmallTests;
26 import org.apache.hadoop.hbase.filter.RegexStringComparator.EngineType;
27 import org.apache.hadoop.hbase.util.Bytes;
28 import org.junit.Test;
29 import org.junit.experimental.categories.Category;
30
31 @Category(SmallTests.class)
32 public class TestRegexComparator {
33
34 @Test
35 public void testSerialization() throws Exception {
36
37 RegexStringComparator a = new RegexStringComparator("a|b");
38 RegexStringComparator b = RegexStringComparator.parseFrom(a.toByteArray());
39 assertTrue(a.areSerializedFieldsEqual(b));
40 assertTrue(b.getEngine() instanceof RegexStringComparator.JavaRegexEngine);
41
42
43 a = new RegexStringComparator("a|b", EngineType.JONI);
44 b = RegexStringComparator.parseFrom(a.toByteArray());
45 assertTrue(a.areSerializedFieldsEqual(b));
46 assertTrue(b.getEngine() instanceof RegexStringComparator.JoniRegexEngine);
47 }
48
49 @Test
50 public void testJavaEngine() throws Exception {
51 for (TestCase t: TEST_CASES) {
52 boolean result = new RegexStringComparator(t.regex, t.flags, EngineType.JAVA)
53 .compareTo(Bytes.toBytes(t.haystack)) == 0;
54 assertEquals("Regex '" + t.regex + "' failed test '" + t.haystack + "'", result,
55 t.expected);
56 }
57 }
58
59 @Test
60 public void testJoniEngine() throws Exception {
61 for (TestCase t: TEST_CASES) {
62 boolean result = new RegexStringComparator(t.regex, t.flags, EngineType.JONI)
63 .compareTo(Bytes.toBytes(t.haystack)) == 0;
64 assertEquals("Regex '" + t.regex + "' failed test '" + t.haystack + "'", result,
65 t.expected);
66 }
67 }
68
69 private static class TestCase {
70 String regex;
71 String haystack;
72 int flags;
73 boolean expected;
74
75 public TestCase(String regex, String haystack, boolean expected) {
76 this(regex, Pattern.DOTALL, haystack, expected);
77 }
78
79 public TestCase(String regex, int flags, String haystack, boolean expected) {
80 this.regex = regex;
81 this.flags = flags;
82 this.haystack = haystack;
83 this.expected = expected;
84 }
85 }
86
87
88 private static TestCase TEST_CASES[] = {
89 new TestCase("a|b", "a", true),
90 new TestCase("a|b", "b", true),
91 new TestCase("a|b", Pattern.CASE_INSENSITIVE, "A", true),
92 new TestCase("a|b", Pattern.CASE_INSENSITIVE, "B", true),
93 new TestCase("a|b", "z", false),
94 new TestCase("a|b|cd", "cd", true),
95 new TestCase("z(a|ac)b", "zacb", true),
96 new TestCase("[abc]+", "ababab", true),
97 new TestCase("[abc]+", "defg", false),
98 new TestCase("[abc]+[def]+[ghi]+", "zzzaaddggzzz", true),
99 new TestCase("[a-\\u4444]+", "za-9z", true),
100 new TestCase("[^abc]+", "ababab", false),
101 new TestCase("[^abc]+", "aaabbbcccdefg", true),
102 new TestCase("[abc^b]", "b", true),
103 new TestCase("[abc[def]]", "b", true),
104 new TestCase("[abc[def]]", "e", true),
105 new TestCase("[a-c[d-f[g-i]]]", "h", true),
106 new TestCase("[a-c[d-f[g-i]]m]", "m", true),
107 new TestCase("[a-c&&[d-f]]", "a", false),
108 new TestCase("[a-c&&[d-f]]", "z", false),
109 new TestCase("[a-m&&m-z&&a-c]", "m", false),
110 new TestCase("[a-m&&m-z&&a-z]", "m", true),
111 new TestCase("[[a-m]&&[^a-c]]", "a", false),
112 new TestCase("[[a-m]&&[^a-c]]", "d", true),
113 new TestCase("[[a-c][d-f]&&abc[def]]", "e", true),
114 new TestCase("[[a-c]&&[b-d]&&[c-e]]", "c", true),
115 new TestCase("[[a-c]&&[b-d][c-e]&&[u-z]]", "c", false),
116 new TestCase("[[a]&&[b][c][a]&&[^d]]", "a", true),
117 new TestCase("[[a]&&[b][c][a]&&[^d]]", "d", false),
118 new TestCase("[[[a-d]&&[c-f]]&&[c]&&c&&[cde]]", "c", true),
119 new TestCase("[x[[wz]abc&&bcd[z]]&&[u-z]]", "z", true),
120 new TestCase("a.c.+", "a#c%&", true),
121 new TestCase("ab.", "ab\n", true),
122 new TestCase("(?s)ab.", "ab\n", true),
123 new TestCase("ab\\wc", "abcc", true),
124 new TestCase("\\W\\w\\W", "#r#", true),
125 new TestCase("\\W\\w\\W", "rrrr#ggg", false),
126 new TestCase("abc[\\sdef]*", "abc def", true),
127 new TestCase("abc[\\sy-z]*", "abc y z", true),
128 new TestCase("abc[a-d\\sm-p]*", "abcaa mn p", true),
129 new TestCase("\\s\\s\\s", "blah err", false),
130 new TestCase("\\S\\S\\s", "blah err", true),
131 new TestCase("ab\\dc", "ab9c", true),
132 new TestCase("\\d\\d\\d", "blah45", false),
133 new TestCase("^abc", "abcdef", true),
134 new TestCase("^abc", "bcdabc", false),
135 new TestCase("^(a)?a", "a", true),
136 new TestCase("^(aa(bb)?)+$", "aabbaa", true),
137 new TestCase("((a|b)?b)+", "b", true),
138 new TestCase("^(a(b)?)+$", "aba", true),
139 new TestCase("^(a(b(c)?)?)?abc", "abc", true),
140 new TestCase("^(a(b(c))).*", "abc", true),
141 new TestCase("a?b", "aaaab", true),
142 new TestCase("a?b", "aaacc", false),
143 new TestCase("a??b", "aaaab", true),
144 new TestCase("a??b", "aaacc", false),
145 new TestCase("a?+b", "aaaab", true),
146 new TestCase("a?+b", "aaacc", false),
147 new TestCase("a+b", "aaaab", true),
148 new TestCase("a+b", "aaacc", false),
149 new TestCase("a+?b", "aaaab", true),
150 new TestCase("a+?b", "aaacc", false),
151 new TestCase("a++b", "aaaab", true),
152 new TestCase("a++b", "aaacc", false),
153 new TestCase("a{2,3}", "a", false),
154 new TestCase("a{2,3}", "aa", true),
155 new TestCase("a{2,3}", "aaa", true),
156 new TestCase("a{3,}", "zzzaaaazzz", true),
157 new TestCase("a{3,}", "zzzaazzz", false),
158 new TestCase("abc(?=d)", "zzzabcd", true),
159 new TestCase("abc(?=d)", "zzzabced", false),
160 new TestCase("abc(?!d)", "zzabcd", false),
161 new TestCase("abc(?!d)", "zzabced", true),
162 new TestCase("\\w(?<=a)", "###abc###", true),
163 new TestCase("\\w(?<=a)", "###ert###", false),
164 new TestCase("(?<!a)c", "bc", true),
165 new TestCase("(?<!a)c", "ac", false),
166 new TestCase("(a+b)+", "ababab", true),
167 new TestCase("(a+b)+", "accccd", false),
168 new TestCase("(ab)+", "ababab", true),
169 new TestCase("(ab)+", "accccd", false),
170 new TestCase("(ab)(cd*)", "zzzabczzz", true),
171 new TestCase("abc(d)*abc", "abcdddddabc", true),
172 new TestCase("a*b", "aaaab", true),
173 new TestCase("a*b", "b", true),
174 new TestCase("a*b", "aaaac", false),
175 new TestCase(".*?b", "aaaab", true),
176 new TestCase("a*+b", "aaaab", true),
177 new TestCase("a*+b", "b", true),
178 new TestCase("a*+b", "aaaac", false),
179 new TestCase("(?i)foobar", "fOobAr", true),
180 new TestCase("f(?i)oobar", "fOobAr", true),
181 new TestCase("f(?i)oobar", "FOobAr", false),
182 new TestCase("foo(?i)bar", "fOobAr", false),
183 new TestCase("(?i)foo[bar]+", "foObAr", true),
184 new TestCase("(?i)foo[a-r]+", "foObAr", true),
185 new TestCase("abc(?x)blah", "abcblah", true),
186 new TestCase("abc(?x) blah", "abcblah", true),
187 new TestCase("abc(?x) blah blech", "abcblahblech", true),
188 new TestCase("[\\n-#]", "!", true),
189 new TestCase("[\\n-#]", "-", false),
190 new TestCase("[\\043]+", "blahblah#blech", true),
191 new TestCase("[\\042-\\044]+", "blahblah#blech", true),
192 new TestCase("[\\u1234-\\u1236]", "blahblah\u1235blech", true),
193 new TestCase("[^\043]*", "blahblah#blech", true),
194 new TestCase("(|f)?+", "foo", true),
195 };
196 }