qLibc
qhash.c
Go to the documentation of this file.
1/******************************************************************************
2 * qLibc
3 *
4 * Copyright (c) 2010-2026 Seungyoung Kim.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29/**
30 * @file qhash.c Hash APIs.
31 */
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <stdbool.h>
36#include <stdint.h>
37#include <string.h>
38#include <unistd.h>
39#include <fcntl.h>
40#include <errno.h>
41#include <sys/types.h>
42#include <sys/stat.h>
43#include "md5/md5.h"
44#include "qinternal.h"
45#include "utilities/qhash.h"
46
47/**
48 * Calculate 128-bit(16-bytes) MD5 hash.
49 *
50 * @param data source object
51 * @param nbytes size of data
52 * @param retbuf user buffer. It must be at leat 16-bytes long.
53 *
54 * @return true if successful, otherwise false.
55 *
56 * @code
57 * // get MD5
58 * unsigned char md5hash[16];
59 * qhashmd5((void*)"hello", 5, md5hash);
60 *
61 * // hex encode
62 * char *md5ascii = qhex_encode(md5hash, 16);
63 * printf("Hex encoded MD5: %s\n", md5ascii);
64 * free(md5ascii);
65 * @endcode
66 */
67bool qhashmd5(const void *data, size_t nbytes, void *retbuf) {
68 if (data == NULL || retbuf == NULL) {
69 errno = EINVAL;
70 return false;
71 }
72
73 MD5_CTX context;
74 MD5Init(&context);
75 MD5Update(&context, (unsigned char *) data, (unsigned int) nbytes);
76 MD5Final(retbuf, &context);
77
78 return true;
79}
80
81/**
82 * Get 128-bit MD5 hash of a file contents.
83 *
84 * @param filepath file path
85 * @param offset start offset. Set to 0 to digest from beginning of file.
86 * @param nbytes number of bytes to digest. Set to 0 to digest until end
87 * of file.
88 * @param retbuf user buffer. It must be at leat 16-bytes long.
89 *
90 * @return true if successful, otherwise false.
91 *
92 * @code
93 * unsigned char md5hash[16];
94 * qhashmd5_file("/tmp/test.dat", 0, 0, md5hash);
95 * @endcode
96 */
97bool qhashmd5_file(const char *filepath, off_t offset, ssize_t nbytes,
98 void *retbuf) {
99 if (filepath == NULL || offset < 0 || nbytes < 0 || retbuf == NULL) {
100 errno = EINVAL;
101 return false;
102 }
103
104 int fd = open(filepath, O_RDONLY, 0);
105 if (fd < 0)
106 return false;
107
108 struct stat st;
109 if (fstat(fd, &st) < 0) {
110 close(fd);
111 return false;
112 }
113 size_t size = st.st_size;
114
115 // check filesize
116 if (size < offset + nbytes) {
117 errno = EINVAL;
118 close(fd);
119 return false;
120 }
121
122 // if requested to digest until the end of file, set nbytes to the remaining size
123 if (nbytes == 0) {
124 nbytes = size - offset;
125 }
126
127 // move the seek pointer to the beginning of the offset
128 if (offset > 0) {
129 if (lseek(fd, offset, SEEK_SET) != offset) {
130 close(fd);
131 return false;
132 }
133 }
134
135 MD5_CTX context;
136 MD5Init(&context);
137 ssize_t toread, nread;
138 unsigned char buf[32 * 1024];
139 for (toread = nbytes; toread > 0; toread -= nread) {
140 if (toread > sizeof(buf))
141 nread = read(fd, buf, sizeof(buf));
142 else
143 nread = read(fd, buf, toread);
144 if (nread < 0)
145 break;
146 MD5Update(&context, buf, nread);
147 }
148 close(fd);
149 if (toread != 0)
150 return false;
151 MD5Final(retbuf, &context);
152
153 return true;
154}
155
156/**
157 * Get 32-bit FNV1 hash.
158 *
159 * @param data source data
160 * @param nbytes size of data
161 *
162 * @return 32-bit unsigned hash value.
163 *
164 * @code
165 * uint32_t hashval = qhashfnv1_32((void*)"hello", 5);
166 * @endcode
167 *
168 * @code
169 * Fowler/Noll/Vo hash
170 *
171 * The basis of this hash algorithm was taken from an idea sent as reviewer
172 * comments to the IEEE POSIX P1003.2 committee by:
173 *
174 * Phong Vo (http://www.research.att.com/info/kpv/)
175 * Glenn Fowler (http://www.research.att.com/~gsf/)
176 *
177 * In a subsequent ballot round:
178 *
179 * Landon Curt Noll (http://www.isthe.com/chongo/)
180 *
181 * improved on their algorithm. Some people tried this hash and found that
182 * it worked rather well. In an EMail message to Landon, they named it the
183 * ``Fowler/Noll/Vo'' or FNV hash.
184 *
185 * FNV hashes are designed to be fast while maintaining a low collision rate.
186 * The FNV speed allows one to quickly hash lots of data while maintaining
187 * a reasonable collision rate. See:
188 *
189 * http://www.isthe.com/chongo/tech/comp/fnv/index.html
190 *
191 * for more details as well as other forms of the FNV hash.
192 * @endcode
193 */
194uint32_t qhashfnv1_32(const void *data, size_t nbytes) {
195 if (data == NULL || nbytes == 0)
196 return 0;
197
198 unsigned char *dp;
199 uint32_t h = 0x811C9DC5;
200
201 for (dp = (unsigned char *) data; *dp && nbytes > 0; dp++, nbytes--) {
202#ifdef __GNUC__
203 h += (h<<1) + (h<<4) + (h<<7) + (h<<8) + (h<<24);
204#else
205 h *= 0x01000193;
206#endif
207 h ^= *dp;
208 }
209
210 return h;
211}
212
213/**
214 * Get 64-bit FNV1 hash integer.
215 *
216 * @param data source data
217 * @param nbytes size of data
218 *
219 * @return 64-bit unsigned hash value.
220 *
221 * @code
222 * uint64_t fnv64 = qhashfnv1_64((void*)"hello", 5);
223 * @endcode
224 */
225uint64_t qhashfnv1_64(const void *data, size_t nbytes) {
226 if (data == NULL || nbytes == 0)
227 return 0;
228
229 unsigned char *dp;
230 uint64_t h = 0xCBF29CE484222325ULL;
231
232 for (dp = (unsigned char *) data; *dp && nbytes > 0; dp++, nbytes--) {
233#ifdef __GNUC__
234 h += (h << 1) + (h << 4) + (h << 5) +
235 (h << 7) + (h << 8) + (h << 40);
236#else
237 h *= 0x100000001B3ULL;
238#endif
239 h ^= *dp;
240 }
241
242 return h;
243}
244
245/**
246 * Get 32-bit Murmur3 hash.
247 *
248 * @param data source data
249 * @param nbytes size of data
250 *
251 * @return 32-bit unsigned hash value.
252 *
253 * @code
254 * uint32_t hashval = qhashmurmur3_32((void*)"hello", 5);
255 * @endcode
256 *
257 * @code
258 * MurmurHash3 was created by Austin Appleby in 2008. The initial
259 * implementation was published in C++ and placed in the public.
260 * https://sites.google.com/site/murmurhash/
261 * Seungyoung Kim has ported its implementation into C language
262 * in 2012 and published it as a part of qLibc component.
263 * @endcode
264 */
265uint32_t qhashmurmur3_32(const void *data, size_t nbytes) {
266 if (data == NULL || nbytes == 0)
267 return 0;
268
269 const uint32_t c1 = 0xcc9e2d51;
270 const uint32_t c2 = 0x1b873593;
271
272 const int nblocks = nbytes / 4;
273 const uint32_t *blocks = (const uint32_t *) (data);
274 const uint8_t *tail = (const uint8_t *) (data + (nblocks * 4));
275
276 uint32_t h = 0;
277
278 int i;
279 uint32_t k;
280 for (i = 0; i < nblocks; i++) {
281 k = blocks[i];
282
283 k *= c1;
284 k = (k << 15) | (k >> (32 - 15));
285 k *= c2;
286
287 h ^= k;
288 h = (h << 13) | (h >> (32 - 13));
289 h = (h * 5) + 0xe6546b64;
290 }
291
292 k = 0;
293 switch (nbytes & 3) {
294 case 3:
295 k ^= tail[2] << 16;
296 case 2:
297 k ^= tail[1] << 8;
298 case 1:
299 k ^= tail[0];
300 k *= c1;
301 k = (k << 15) | (k >> (32 - 15));
302 k *= c2;
303 h ^= k;
304 };
305
306 h ^= nbytes;
307
308 h ^= h >> 16;
309 h *= 0x85ebca6b;
310 h ^= h >> 13;
311 h *= 0xc2b2ae35;
312 h ^= h >> 16;
313
314 return h;
315}
316
317/**
318 * Get 128-bit Murmur3 hash.
319 *
320 * @param data source data
321 * @param nbytes size of data
322 * @param retbuf user buffer. It must be at leat 16-bytes long.
323 *
324 * @return true if successful, otherwise false.
325 *
326 * @code
327 * // get 128-bit Murmur3 hash.
328 * unsigned char hash[16];
329 * qhashmurmur3_128((void*)"hello", 5, hash);
330 *
331 * // hex encode
332 * char *ascii = qhex_encode(hash, 16);
333 * printf("Hex encoded Murmur3: %s\n", ascii);
334 * free(ascii);
335 * @endcode
336 */
337bool qhashmurmur3_128(const void *data, size_t nbytes, void *retbuf) {
338 if (data == NULL || nbytes == 0)
339 return false;
340
341 const uint64_t c1 = 0x87c37b91114253d5ULL;
342 const uint64_t c2 = 0x4cf5ad432745937fULL;
343
344 const int nblocks = nbytes / 16;
345 const uint64_t *blocks = (const uint64_t *) (data);
346 const uint8_t *tail = (const uint8_t *) (data + (nblocks * 16));
347
348 uint64_t h1 = 0;
349 uint64_t h2 = 0;
350
351 int i;
352 uint64_t k1, k2;
353 for (i = 0; i < nblocks; i++) {
354 k1 = blocks[i * 2 + 0];
355 k2 = blocks[i * 2 + 1];
356
357 k1 *= c1;
358 k1 = (k1 << 31) | (k1 >> (64 - 31));
359 k1 *= c2;
360 h1 ^= k1;
361
362 h1 = (h1 << 27) | (h1 >> (64 - 27));
363 h1 += h2;
364 h1 = h1 * 5 + 0x52dce729;
365
366 k2 *= c2;
367 k2 = (k2 << 33) | (k2 >> (64 - 33));
368 k2 *= c1;
369 h2 ^= k2;
370
371 h2 = (h2 << 31) | (h2 >> (64 - 31));
372 h2 += h1;
373 h2 = h2 * 5 + 0x38495ab5;
374 }
375
376 k1 = k2 = 0;
377 switch (nbytes & 15) {
378 case 15:
379 k2 ^= (uint64_t)(tail[14]) << 48;
380 case 14:
381 k2 ^= (uint64_t)(tail[13]) << 40;
382 case 13:
383 k2 ^= (uint64_t)(tail[12]) << 32;
384 case 12:
385 k2 ^= (uint64_t)(tail[11]) << 24;
386 case 11:
387 k2 ^= (uint64_t)(tail[10]) << 16;
388 case 10:
389 k2 ^= (uint64_t)(tail[9]) << 8;
390 case 9:
391 k2 ^= (uint64_t)(tail[8]) << 0;
392 k2 *= c2;
393 k2 = (k2 << 33) | (k2 >> (64 - 33));
394 k2 *= c1;
395 h2 ^= k2;
396
397 case 8:
398 k1 ^= (uint64_t)(tail[7]) << 56;
399 case 7:
400 k1 ^= (uint64_t)(tail[6]) << 48;
401 case 6:
402 k1 ^= (uint64_t)(tail[5]) << 40;
403 case 5:
404 k1 ^= (uint64_t)(tail[4]) << 32;
405 case 4:
406 k1 ^= (uint64_t)(tail[3]) << 24;
407 case 3:
408 k1 ^= (uint64_t)(tail[2]) << 16;
409 case 2:
410 k1 ^= (uint64_t)(tail[1]) << 8;
411 case 1:
412 k1 ^= (uint64_t)(tail[0]) << 0;
413 k1 *= c1;
414 k1 = (k1 << 31) | (k1 >> (64 - 31));
415 k1 *= c2;
416 h1 ^= k1;
417 };
418
419 //----------
420 // finalization
421
422 h1 ^= nbytes;
423 h2 ^= nbytes;
424
425 h1 += h2;
426 h2 += h1;
427
428 h1 ^= h1 >> 33;
429 h1 *= 0xff51afd7ed558ccdULL;
430 h1 ^= h1 >> 33;
431 h1 *= 0xc4ceb9fe1a85ec53ULL;
432 h1 ^= h1 >> 33;
433
434 h2 ^= h2 >> 33;
435 h2 *= 0xff51afd7ed558ccdULL;
436 h2 ^= h2 >> 33;
437 h2 *= 0xc4ceb9fe1a85ec53ULL;
438 h2 ^= h2 >> 33;
439
440 h1 += h2;
441 h2 += h1;
442
443 ((uint64_t *) retbuf)[0] = h1;
444 ((uint64_t *) retbuf)[1] = h2;
445
446 return true;
447}
bool qhashmd5(const void *data, size_t nbytes, void *retbuf)
Calculate 128-bit(16-bytes) MD5 hash.
Definition qhash.c:67
uint32_t qhashmurmur3_32(const void *data, size_t nbytes)
Get 32-bit Murmur3 hash.
Definition qhash.c:265
uint64_t qhashfnv1_64(const void *data, size_t nbytes)
Get 64-bit FNV1 hash integer.
Definition qhash.c:225
bool qhashmurmur3_128(const void *data, size_t nbytes, void *retbuf)
Get 128-bit Murmur3 hash.
Definition qhash.c:337
uint32_t qhashfnv1_32(const void *data, size_t nbytes)
Get 32-bit FNV1 hash.
Definition qhash.c:194
bool qhashmd5_file(const char *filepath, off_t offset, ssize_t nbytes, void *retbuf)
Get 128-bit MD5 hash of a file contents.
Definition qhash.c:97