qLibc
qhash.c
Go to the documentation of this file.
1/******************************************************************************
2 * qLibc
3 *
4 * Copyright (c) 2010-2015 Seungyoung Kim.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29/**
30 * @file qhash.c Hash APIs.
31 */
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <stdbool.h>
36#include <stdint.h>
37#include <string.h>
38#include <unistd.h>
39#include <fcntl.h>
40#include <errno.h>
41#include <sys/types.h>
42#include <sys/stat.h>
43#include "md5/md5.h"
44#include "qinternal.h"
45#include "utilities/qhash.h"
46
47/**
48 * Calculate 128-bit(16-bytes) MD5 hash.
49 *
50 * @param data source object
51 * @param nbytes size of data
52 * @param retbuf user buffer. It must be at leat 16-bytes long.
53 *
54 * @return true if successful, otherwise false.
55 *
56 * @code
57 * // get MD5
58 * unsigned char md5hash[16];
59 * qhashmd5((void*)"hello", 5, md5hash);
60 *
61 * // hex encode
62 * char *md5ascii = qhex_encode(md5hash, 16);
63 * printf("Hex encoded MD5: %s\n", md5ascii);
64 * free(md5ascii);
65 * @endcode
66 */
67bool qhashmd5(const void *data, size_t nbytes, void *retbuf) {
68 if (data == NULL || retbuf == NULL) {
69 errno = EINVAL;
70 return false;
71 }
72
73 MD5_CTX context;
74 MD5Init(&context);
75 MD5Update(&context, (unsigned char *) data, (unsigned int) nbytes);
76 MD5Final(retbuf, &context);
77
78 return true;
79}
80
81/**
82 * Get 128-bit MD5 hash of a file contents.
83 *
84 * @param filepath file path
85 * @param offset start offset. Set to 0 to digest from beginning of file.
86 * @param nbytes number of bytes to digest. Set to 0 to digest until end
87 * of file.
88 * @param retbuf user buffer. It must be at leat 16-bytes long.
89 *
90 * @return true if successful, otherwise false.
91 *
92 * @code
93 * unsigned char md5hash[16];
94 * qhashmd5_file("/tmp/test.dat", 0, 0, md5hash);
95 * @endcode
96 */
97bool qhashmd5_file(const char *filepath, off_t offset, ssize_t nbytes,
98 void *retbuf) {
99 if (filepath == NULL || offset < 0 || nbytes < 0 || retbuf == NULL) {
100 errno = EINVAL;
101 return false;
102 }
103
104 int fd = open(filepath, O_RDONLY, 0);
105 if (fd < 0)
106 return false;
107
108 struct stat st;
109 if (fstat(fd, &st) < 0)
110 return false;
111 size_t size = st.st_size;
112
113 // check filesize
114 if (size < offset + nbytes) {
115 errno = EINVAL;
116 close(fd);
117 return false;
118 }
119
120 // if requested to digest until the end of file, set nbytes to the remaining size
121 if (nbytes == 0) {
122 nbytes = size - offset;
123 }
124
125 // move the seek pointer to the beginning of the offset
126 if (offset > 0) {
127 if (lseek(fd, offset, SEEK_SET) != offset) {
128 close(fd);
129 return false;
130 }
131 }
132
133 MD5_CTX context;
134 MD5Init(&context);
135 ssize_t toread, nread;
136 unsigned char buf[32 * 1024];
137 for (toread = nbytes; toread > 0; toread -= nread) {
138 if (toread > sizeof(buf))
139 nread = read(fd, buf, sizeof(buf));
140 else
141 nread = read(fd, buf, toread);
142 if (nread < 0)
143 break;
144 MD5Update(&context, buf, nread);
145 }
146 close(fd);
147 if (toread != 0)
148 return false;
149 MD5Final(retbuf, &context);
150
151 return true;
152}
153
154/**
155 * Get 32-bit FNV1 hash.
156 *
157 * @param data source data
158 * @param nbytes size of data
159 *
160 * @return 32-bit unsigned hash value.
161 *
162 * @code
163 * uint32_t hashval = qhashfnv1_32((void*)"hello", 5);
164 * @endcode
165 *
166 * @code
167 * Fowler/Noll/Vo hash
168 *
169 * The basis of this hash algorithm was taken from an idea sent as reviewer
170 * comments to the IEEE POSIX P1003.2 committee by:
171 *
172 * Phong Vo (http://www.research.att.com/info/kpv/)
173 * Glenn Fowler (http://www.research.att.com/~gsf/)
174 *
175 * In a subsequent ballot round:
176 *
177 * Landon Curt Noll (http://www.isthe.com/chongo/)
178 *
179 * improved on their algorithm. Some people tried this hash and found that
180 * it worked rather well. In an EMail message to Landon, they named it the
181 * ``Fowler/Noll/Vo'' or FNV hash.
182 *
183 * FNV hashes are designed to be fast while maintaining a low collision rate.
184 * The FNV speed allows one to quickly hash lots of data while maintaining
185 * a reasonable collision rate. See:
186 *
187 * http://www.isthe.com/chongo/tech/comp/fnv/index.html
188 *
189 * for more details as well as other forms of the FNV hash.
190 * @endcode
191 */
192uint32_t qhashfnv1_32(const void *data, size_t nbytes) {
193 if (data == NULL || nbytes == 0)
194 return 0;
195
196 unsigned char *dp;
197 uint32_t h = 0x811C9DC5;
198
199 for (dp = (unsigned char *) data; *dp && nbytes > 0; dp++, nbytes--) {
200#ifdef __GNUC__
201 h += (h<<1) + (h<<4) + (h<<7) + (h<<8) + (h<<24);
202#else
203 h *= 0x01000193;
204#endif
205 h ^= *dp;
206 }
207
208 return h;
209}
210
211/**
212 * Get 64-bit FNV1 hash integer.
213 *
214 * @param data source data
215 * @param nbytes size of data
216 *
217 * @return 64-bit unsigned hash value.
218 *
219 * @code
220 * uint64_t fnv64 = qhashfnv1_64((void*)"hello", 5);
221 * @endcode
222 */
223uint64_t qhashfnv1_64(const void *data, size_t nbytes) {
224 if (data == NULL || nbytes == 0)
225 return 0;
226
227 unsigned char *dp;
228 uint64_t h = 0xCBF29CE484222325ULL;
229
230 for (dp = (unsigned char *) data; *dp && nbytes > 0; dp++, nbytes--) {
231#ifdef __GNUC__
232 h += (h << 1) + (h << 4) + (h << 5) +
233 (h << 7) + (h << 8) + (h << 40);
234#else
235 h *= 0x100000001B3ULL;
236#endif
237 h ^= *dp;
238 }
239
240 return h;
241}
242
243/**
244 * Get 32-bit Murmur3 hash.
245 *
246 * @param data source data
247 * @param nbytes size of data
248 *
249 * @return 32-bit unsigned hash value.
250 *
251 * @code
252 * uint32_t hashval = qhashmurmur3_32((void*)"hello", 5);
253 * @endcode
254 *
255 * @code
256 * MurmurHash3 was created by Austin Appleby in 2008. The initial
257 * implementation was published in C++ and placed in the public.
258 * https://sites.google.com/site/murmurhash/
259 * Seungyoung Kim has ported its implementation into C language
260 * in 2012 and published it as a part of qLibc component.
261 * @endcode
262 */
263uint32_t qhashmurmur3_32(const void *data, size_t nbytes) {
264 if (data == NULL || nbytes == 0)
265 return 0;
266
267 const uint32_t c1 = 0xcc9e2d51;
268 const uint32_t c2 = 0x1b873593;
269
270 const int nblocks = nbytes / 4;
271 const uint32_t *blocks = (const uint32_t *) (data);
272 const uint8_t *tail = (const uint8_t *) (data + (nblocks * 4));
273
274 uint32_t h = 0;
275
276 int i;
277 uint32_t k;
278 for (i = 0; i < nblocks; i++) {
279 k = blocks[i];
280
281 k *= c1;
282 k = (k << 15) | (k >> (32 - 15));
283 k *= c2;
284
285 h ^= k;
286 h = (h << 13) | (h >> (32 - 13));
287 h = (h * 5) + 0xe6546b64;
288 }
289
290 k = 0;
291 switch (nbytes & 3) {
292 case 3:
293 k ^= tail[2] << 16;
294 case 2:
295 k ^= tail[1] << 8;
296 case 1:
297 k ^= tail[0];
298 k *= c1;
299 k = (k << 15) | (k >> (32 - 15));
300 k *= c2;
301 h ^= k;
302 };
303
304 h ^= nbytes;
305
306 h ^= h >> 16;
307 h *= 0x85ebca6b;
308 h ^= h >> 13;
309 h *= 0xc2b2ae35;
310 h ^= h >> 16;
311
312 return h;
313}
314
315/**
316 * Get 128-bit Murmur3 hash.
317 *
318 * @param data source data
319 * @param nbytes size of data
320 * @param retbuf user buffer. It must be at leat 16-bytes long.
321 *
322 * @return true if successful, otherwise false.
323 *
324 * @code
325 * // get 128-bit Murmur3 hash.
326 * unsigned char hash[16];
327 * qhashmurmur3_128((void*)"hello", 5, hash);
328 *
329 * // hex encode
330 * char *ascii = qhex_encode(hash, 16);
331 * printf("Hex encoded Murmur3: %s\n", ascii);
332 * free(ascii);
333 * @endcode
334 */
335bool qhashmurmur3_128(const void *data, size_t nbytes, void *retbuf) {
336 if (data == NULL || nbytes == 0)
337 return false;
338
339 const uint64_t c1 = 0x87c37b91114253d5ULL;
340 const uint64_t c2 = 0x4cf5ad432745937fULL;
341
342 const int nblocks = nbytes / 16;
343 const uint64_t *blocks = (const uint64_t *) (data);
344 const uint8_t *tail = (const uint8_t *) (data + (nblocks * 16));
345
346 uint64_t h1 = 0;
347 uint64_t h2 = 0;
348
349 int i;
350 uint64_t k1, k2;
351 for (i = 0; i < nblocks; i++) {
352 k1 = blocks[i * 2 + 0];
353 k2 = blocks[i * 2 + 1];
354
355 k1 *= c1;
356 k1 = (k1 << 31) | (k1 >> (64 - 31));
357 k1 *= c2;
358 h1 ^= k1;
359
360 h1 = (h1 << 27) | (h1 >> (64 - 27));
361 h1 += h2;
362 h1 = h1 * 5 + 0x52dce729;
363
364 k2 *= c2;
365 k2 = (k2 << 33) | (k2 >> (64 - 33));
366 k2 *= c1;
367 h2 ^= k2;
368
369 h2 = (h2 << 31) | (h2 >> (64 - 31));
370 h2 += h1;
371 h2 = h2 * 5 + 0x38495ab5;
372 }
373
374 k1 = k2 = 0;
375 switch (nbytes & 15) {
376 case 15:
377 k2 ^= (uint64_t)(tail[14]) << 48;
378 case 14:
379 k2 ^= (uint64_t)(tail[13]) << 40;
380 case 13:
381 k2 ^= (uint64_t)(tail[12]) << 32;
382 case 12:
383 k2 ^= (uint64_t)(tail[11]) << 24;
384 case 11:
385 k2 ^= (uint64_t)(tail[10]) << 16;
386 case 10:
387 k2 ^= (uint64_t)(tail[9]) << 8;
388 case 9:
389 k2 ^= (uint64_t)(tail[8]) << 0;
390 k2 *= c2;
391 k2 = (k2 << 33) | (k2 >> (64 - 33));
392 k2 *= c1;
393 h2 ^= k2;
394
395 case 8:
396 k1 ^= (uint64_t)(tail[7]) << 56;
397 case 7:
398 k1 ^= (uint64_t)(tail[6]) << 48;
399 case 6:
400 k1 ^= (uint64_t)(tail[5]) << 40;
401 case 5:
402 k1 ^= (uint64_t)(tail[4]) << 32;
403 case 4:
404 k1 ^= (uint64_t)(tail[3]) << 24;
405 case 3:
406 k1 ^= (uint64_t)(tail[2]) << 16;
407 case 2:
408 k1 ^= (uint64_t)(tail[1]) << 8;
409 case 1:
410 k1 ^= (uint64_t)(tail[0]) << 0;
411 k1 *= c1;
412 k1 = (k1 << 31) | (k1 >> (64 - 31));
413 k1 *= c2;
414 h1 ^= k1;
415 };
416
417 //----------
418 // finalization
419
420 h1 ^= nbytes;
421 h2 ^= nbytes;
422
423 h1 += h2;
424 h2 += h1;
425
426 h1 ^= h1 >> 33;
427 h1 *= 0xff51afd7ed558ccdULL;
428 h1 ^= h1 >> 33;
429 h1 *= 0xc4ceb9fe1a85ec53ULL;
430 h1 ^= h1 >> 33;
431
432 h2 ^= h2 >> 33;
433 h2 *= 0xff51afd7ed558ccdULL;
434 h2 ^= h2 >> 33;
435 h2 *= 0xc4ceb9fe1a85ec53ULL;
436 h2 ^= h2 >> 33;
437
438 h1 += h2;
439 h2 += h1;
440
441 ((uint64_t *) retbuf)[0] = h1;
442 ((uint64_t *) retbuf)[1] = h2;
443
444 return true;
445}
bool qhashmd5(const void *data, size_t nbytes, void *retbuf)
Calculate 128-bit(16-bytes) MD5 hash.
Definition qhash.c:67
uint32_t qhashmurmur3_32(const void *data, size_t nbytes)
Get 32-bit Murmur3 hash.
Definition qhash.c:263
uint64_t qhashfnv1_64(const void *data, size_t nbytes)
Get 64-bit FNV1 hash integer.
Definition qhash.c:223
bool qhashmurmur3_128(const void *data, size_t nbytes, void *retbuf)
Get 128-bit Murmur3 hash.
Definition qhash.c:335
uint32_t qhashfnv1_32(const void *data, size_t nbytes)
Get 32-bit FNV1 hash.
Definition qhash.c:192
bool qhashmd5_file(const char *filepath, off_t offset, ssize_t nbytes, void *retbuf)
Get 128-bit MD5 hash of a file contents.
Definition qhash.c:97