Mercurial > hg
annotate contrib/python-zstandard/zstd/dictBuilder/zdict.c @ 51991:f0bee30af890 default tip
tests: replace inline `waitlock()` with `wait-on-file` script
The latter scales up the timeout based on the timeout value provided to the test
runner, and I was seeing timeouts on Windows when running all of the tests using
all CPU cores.
author | Matt Harbison <matt_harbison@yahoo.com> |
---|---|
date | Tue, 08 Oct 2024 01:06:57 -0400 |
parents | de7838053207 |
children |
rev | line source |
---|---|
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1 /* |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
3 * All rights reserved. |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
4 * |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
5 * This source code is licensed under both the BSD-style license (found in the |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
7 * in the COPYING file in the root directory of this source tree). |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
8 * You may select, at your option, one of the above-listed licenses. |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
9 */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
10 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
11 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
12 /*-************************************** |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
13 * Tuning parameters |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
14 ****************************************/ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
15 #define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */ |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
16 #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20) |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
17 #define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
18 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
19 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
20 /*-************************************** |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
21 * Compiler Options |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
22 ****************************************/ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
23 /* Unix Large Files support (>4GB) */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
24 #define _FILE_OFFSET_BITS 64 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
25 #if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
26 # define _LARGEFILE_SOURCE |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
27 #elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
28 # define _LARGEFILE64_SOURCE |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
29 #endif |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
30 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
31 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
32 /*-************************************* |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
33 * Dependencies |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
34 ***************************************/ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
35 #include <stdlib.h> /* malloc, free */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
36 #include <string.h> /* memset */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
37 #include <stdio.h> /* fprintf, fopen, ftello64 */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
38 #include <time.h> /* clock */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
39 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
40 #include "mem.h" /* read */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
41 #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
42 #define HUF_STATIC_LINKING_ONLY |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
43 #include "huf.h" /* HUF_buildCTable, HUF_writeCTable */ |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
44 #include "zstd_internal.h" /* includes zstd.h */ |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
45 #include "xxhash.h" /* XXH64 */ |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
46 #include "divsufsort.h" |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
47 #ifndef ZDICT_STATIC_LINKING_ONLY |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
48 # define ZDICT_STATIC_LINKING_ONLY |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
49 #endif |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
50 #include "zdict.h" |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
51 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
52 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
53 /*-************************************* |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
54 * Constants |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
55 ***************************************/ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
56 #define KB *(1 <<10) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
57 #define MB *(1 <<20) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
58 #define GB *(1U<<30) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
59 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
60 #define DICTLISTSIZE_DEFAULT 10000 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
61 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
62 #define NOISELENGTH 32 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
63 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
64 static const int g_compressionLevel_default = 3; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
65 static const U32 g_selectivity_default = 9; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
66 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
67 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
68 /*-************************************* |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
69 * Console display |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
70 ***************************************/ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
71 #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
72 #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
73 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
74 static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
75 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
76 static void ZDICT_printHex(const void* ptr, size_t length) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
77 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
78 const BYTE* const b = (const BYTE*)ptr; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
79 size_t u; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
80 for (u=0; u<length; u++) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
81 BYTE c = b[u]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
82 if (c<32 || c>126) c = '.'; /* non-printable char */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
83 DISPLAY("%c", c); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
84 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
85 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
86 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
87 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
88 /*-******************************************************** |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
89 * Helper functions |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
90 **********************************************************/ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
91 unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
92 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
93 const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
94 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
95 unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
96 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
97 if (dictSize < 8) return 0; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
98 if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
99 return MEM_readLE32((const char*)dictBuffer + 4); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
100 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
101 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
102 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
103 /*-******************************************************** |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
104 * Dictionary training functions |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
105 **********************************************************/ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
106 static unsigned ZDICT_NbCommonBytes (size_t val) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
107 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
108 if (MEM_isLittleEndian()) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
109 if (MEM_64bits()) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
110 # if defined(_MSC_VER) && defined(_WIN64) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
111 unsigned long r = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
112 _BitScanForward64( &r, (U64)val ); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
113 return (unsigned)(r>>3); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
114 # elif defined(__GNUC__) && (__GNUC__ >= 3) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
115 return (__builtin_ctzll((U64)val) >> 3); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
116 # else |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
117 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
118 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
119 # endif |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
120 } else { /* 32 bits */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
121 # if defined(_MSC_VER) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
122 unsigned long r=0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
123 _BitScanForward( &r, (U32)val ); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
124 return (unsigned)(r>>3); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
125 # elif defined(__GNUC__) && (__GNUC__ >= 3) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
126 return (__builtin_ctz((U32)val) >> 3); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
127 # else |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
128 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
129 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
130 # endif |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
131 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
132 } else { /* Big Endian CPU */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
133 if (MEM_64bits()) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
134 # if defined(_MSC_VER) && defined(_WIN64) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
135 unsigned long r = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
136 _BitScanReverse64( &r, val ); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
137 return (unsigned)(r>>3); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
138 # elif defined(__GNUC__) && (__GNUC__ >= 3) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
139 return (__builtin_clzll(val) >> 3); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
140 # else |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
141 unsigned r; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
142 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
143 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
144 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
145 r += (!val); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
146 return r; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
147 # endif |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
148 } else { /* 32 bits */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
149 # if defined(_MSC_VER) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
150 unsigned long r = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
151 _BitScanReverse( &r, (unsigned long)val ); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
152 return (unsigned)(r>>3); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
153 # elif defined(__GNUC__) && (__GNUC__ >= 3) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
154 return (__builtin_clz((U32)val) >> 3); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
155 # else |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
156 unsigned r; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
157 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
158 r += (!val); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
159 return r; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
160 # endif |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
161 } } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
162 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
163 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
164 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
165 /*! ZDICT_count() : |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
166 Count the nb of common bytes between 2 pointers. |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
167 Note : this function presumes end of buffer followed by noisy guard band. |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
168 */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
169 static size_t ZDICT_count(const void* pIn, const void* pMatch) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
170 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
171 const char* const pStart = (const char*)pIn; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
172 for (;;) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
173 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
174 if (!diff) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
175 pIn = (const char*)pIn+sizeof(size_t); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
176 pMatch = (const char*)pMatch+sizeof(size_t); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
177 continue; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
178 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
179 pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
180 return (size_t)((const char*)pIn - pStart); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
181 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
182 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
183 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
184 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
185 typedef struct { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
186 U32 pos; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
187 U32 length; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
188 U32 savings; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
189 } dictItem; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
190 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
191 static void ZDICT_initDictItem(dictItem* d) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
192 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
193 d->pos = 1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
194 d->length = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
195 d->savings = (U32)(-1); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
196 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
197 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
198 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
199 #define LLIMIT 64 /* heuristic determined experimentally */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
200 #define MINMATCHLENGTH 7 /* heuristic determined experimentally */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
201 static dictItem ZDICT_analyzePos( |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
202 BYTE* doneMarks, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
203 const int* suffix, U32 start, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
204 const void* buffer, U32 minRatio, U32 notificationLevel) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
205 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
206 U32 lengthList[LLIMIT] = {0}; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
207 U32 cumulLength[LLIMIT] = {0}; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
208 U32 savings[LLIMIT] = {0}; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
209 const BYTE* b = (const BYTE*)buffer; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
210 size_t maxLength = LLIMIT; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
211 size_t pos = suffix[start]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
212 U32 end = start; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
213 dictItem solution; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
214 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
215 /* init */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
216 memset(&solution, 0, sizeof(solution)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
217 doneMarks[pos] = 1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
218 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
219 /* trivial repetition cases */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
220 if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
221 ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
222 ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
223 /* skip and mark segment */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
224 U16 const pattern16 = MEM_read16(b+pos+4); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
225 U32 u, patternEnd = 6; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
226 while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
227 if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
228 for (u=1; u<patternEnd; u++) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
229 doneMarks[pos+u] = 1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
230 return solution; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
231 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
232 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
233 /* look forward */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
234 { size_t length; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
235 do { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
236 end++; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
237 length = ZDICT_count(b + pos, b + suffix[end]); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
238 } while (length >= MINMATCHLENGTH); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
239 } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
240 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
241 /* look backward */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
242 { size_t length; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
243 do { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
244 length = ZDICT_count(b + pos, b + *(suffix+start-1)); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
245 if (length >=MINMATCHLENGTH) start--; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
246 } while(length >= MINMATCHLENGTH); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
247 } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
248 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
249 /* exit if not found a minimum nb of repetitions */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
250 if (end-start < minRatio) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
251 U32 idx; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
252 for(idx=start; idx<end; idx++) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
253 doneMarks[suffix[idx]] = 1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
254 return solution; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
255 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
256 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
257 { int i; |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
258 U32 mml; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
259 U32 refinedStart = start; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
260 U32 refinedEnd = end; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
261 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
262 DISPLAYLEVEL(4, "\n"); |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
263 DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
264 DISPLAYLEVEL(4, "\n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
265 |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
266 for (mml = MINMATCHLENGTH ; ; mml++) { |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
267 BYTE currentChar = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
268 U32 currentCount = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
269 U32 currentID = refinedStart; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
270 U32 id; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
271 U32 selectedCount = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
272 U32 selectedID = currentID; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
273 for (id =refinedStart; id < refinedEnd; id++) { |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
274 if (b[suffix[id] + mml] != currentChar) { |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
275 if (currentCount > selectedCount) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
276 selectedCount = currentCount; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
277 selectedID = currentID; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
278 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
279 currentID = id; |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
280 currentChar = b[ suffix[id] + mml]; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
281 currentCount = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
282 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
283 currentCount ++; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
284 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
285 if (currentCount > selectedCount) { /* for last */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
286 selectedCount = currentCount; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
287 selectedID = currentID; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
288 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
289 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
290 if (selectedCount < minRatio) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
291 break; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
292 refinedStart = selectedID; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
293 refinedEnd = refinedStart + selectedCount; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
294 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
295 |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
296 /* evaluate gain based on new dict */ |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
297 start = refinedStart; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
298 pos = suffix[refinedStart]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
299 end = start; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
300 memset(lengthList, 0, sizeof(lengthList)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
301 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
302 /* look forward */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
303 { size_t length; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
304 do { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
305 end++; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
306 length = ZDICT_count(b + pos, b + suffix[end]); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
307 if (length >= LLIMIT) length = LLIMIT-1; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
308 lengthList[length]++; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
309 } while (length >=MINMATCHLENGTH); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
310 } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
311 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
312 /* look backward */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
313 { size_t length = MINMATCHLENGTH; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
314 while ((length >= MINMATCHLENGTH) & (start > 0)) { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
315 length = ZDICT_count(b + pos, b + suffix[start - 1]); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
316 if (length >= LLIMIT) length = LLIMIT - 1; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
317 lengthList[length]++; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
318 if (length >= MINMATCHLENGTH) start--; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
319 } |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
320 } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
321 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
322 /* largest useful length */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
323 memset(cumulLength, 0, sizeof(cumulLength)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
324 cumulLength[maxLength-1] = lengthList[maxLength-1]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
325 for (i=(int)(maxLength-2); i>=0; i--) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
326 cumulLength[i] = cumulLength[i+1] + lengthList[i]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
327 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
328 for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
329 maxLength = i; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
330 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
331 /* reduce maxLength in case of final into repetitive data */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
332 { U32 l = (U32)maxLength; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
333 BYTE const c = b[pos + maxLength-1]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
334 while (b[pos+l-2]==c) l--; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
335 maxLength = l; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
336 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
337 if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
338 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
339 /* calculate savings */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
340 savings[5] = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
341 for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
342 savings[i] = savings[i-1] + (lengthList[i] * (i-3)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
343 |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
344 DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n", |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
345 (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
346 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
347 solution.pos = (U32)pos; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
348 solution.length = (U32)maxLength; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
349 solution.savings = savings[maxLength]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
350 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
351 /* mark positions done */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
352 { U32 id; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
353 for (id=start; id<end; id++) { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
354 U32 p, pEnd, length; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
355 U32 const testedPos = suffix[id]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
356 if (testedPos == pos) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
357 length = solution.length; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
358 else { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
359 length = (U32)ZDICT_count(b+pos, b+testedPos); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
360 if (length > solution.length) length = solution.length; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
361 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
362 pEnd = (U32)(testedPos + length); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
363 for (p=testedPos; p<pEnd; p++) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
364 doneMarks[p] = 1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
365 } } } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
366 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
367 return solution; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
368 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
369 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
370 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
371 static int isIncluded(const void* in, const void* container, size_t length) |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
372 { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
373 const char* const ip = (const char*) in; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
374 const char* const into = (const char*) container; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
375 size_t u; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
376 |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
377 for (u=0; u<length; u++) { /* works because end of buffer is a noisy guard band */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
378 if (ip[u] != into[u]) break; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
379 } |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
380 |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
381 return u==length; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
382 } |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
383 |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
384 /*! ZDICT_tryMerge() : |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
385 check if dictItem can be merged, do it if possible |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
386 @return : id of destination elt, 0 if not merged |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
387 */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
388 static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
389 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
390 const U32 tableSize = table->pos; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
391 const U32 eltEnd = elt.pos + elt.length; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
392 const char* const buf = (const char*) buffer; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
393 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
394 /* tail overlap */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
395 U32 u; for (u=1; u<tableSize; u++) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
396 if (u==eltNbToSkip) continue; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
397 if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
398 /* append */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
399 U32 const addedLength = table[u].pos - elt.pos; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
400 table[u].length += addedLength; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
401 table[u].pos = elt.pos; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
402 table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
403 table[u].savings += elt.length / 8; /* rough approx bonus */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
404 elt = table[u]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
405 /* sort : improve rank */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
406 while ((u>1) && (table[u-1].savings < elt.savings)) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
407 table[u] = table[u-1], u--; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
408 table[u] = elt; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
409 return u; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
410 } } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
411 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
412 /* front overlap */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
413 for (u=1; u<tableSize; u++) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
414 if (u==eltNbToSkip) continue; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
415 |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
416 if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
417 /* append */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
418 int const addedLength = (int)eltEnd - (table[u].pos + table[u].length); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
419 table[u].savings += elt.length / 8; /* rough approx bonus */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
420 if (addedLength > 0) { /* otherwise, elt fully included into existing */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
421 table[u].length += addedLength; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
422 table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
423 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
424 /* sort : improve rank */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
425 elt = table[u]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
426 while ((u>1) && (table[u-1].savings < elt.savings)) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
427 table[u] = table[u-1], u--; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
428 table[u] = elt; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
429 return u; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
430 } |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
431 |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
432 if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
433 if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
434 size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 ); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
435 table[u].pos = elt.pos; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
436 table[u].savings += (U32)(elt.savings * addedLength / elt.length); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
437 table[u].length = MIN(elt.length, table[u].length + 1); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
438 return u; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
439 } |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
440 } |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
441 } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
442 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
443 return 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
444 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
445 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
446 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
447 static void ZDICT_removeDictItem(dictItem* table, U32 id) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
448 { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
449 /* convention : table[0].pos stores nb of elts */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
450 U32 const max = table[0].pos; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
451 U32 u; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
452 if (!id) return; /* protection, should never happen */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
453 for (u=id; u<max-1; u++) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
454 table[u] = table[u+1]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
455 table->pos--; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
456 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
457 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
458 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
459 static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
460 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
461 /* merge if possible */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
462 U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
463 if (mergeId) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
464 U32 newMerge = 1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
465 while (newMerge) { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
466 newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
467 if (newMerge) ZDICT_removeDictItem(table, mergeId); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
468 mergeId = newMerge; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
469 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
470 return; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
471 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
472 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
473 /* insert */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
474 { U32 current; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
475 U32 nextElt = table->pos; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
476 if (nextElt >= maxSize) nextElt = maxSize-1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
477 current = nextElt-1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
478 while (table[current].savings < elt.savings) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
479 table[current+1] = table[current]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
480 current--; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
481 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
482 table[current+1] = elt; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
483 table->pos = nextElt+1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
484 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
485 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
486 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
487 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
488 static U32 ZDICT_dictSize(const dictItem* dictList) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
489 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
490 U32 u, dictSize = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
491 for (u=1; u<dictList[0].pos; u++) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
492 dictSize += dictList[u].length; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
493 return dictSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
494 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
495 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
496 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
497 static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize, |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
498 const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
499 const size_t* fileSizes, unsigned nbFiles, |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
500 unsigned minRatio, U32 notificationLevel) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
501 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
502 int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
503 int* const suffix = suffix0+1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
504 U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
505 BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks)); /* +16 for overflow security */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
506 U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
507 size_t result = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
508 clock_t displayClock = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
509 clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
510 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
511 # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
512 if (ZDICT_clockSpan(displayClock) > refreshRate) \ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
513 { displayClock = clock(); DISPLAY(__VA_ARGS__); \ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
514 if (notificationLevel>=4) fflush(stderr); } } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
515 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
516 /* init */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
517 DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
518 if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
519 result = ERROR(memory_allocation); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
520 goto _cleanup; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
521 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
522 if (minRatio < MINRATIO) minRatio = MINRATIO; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
523 memset(doneMarks, 0, bufferSize+16); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
524 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
525 /* limit sample set size (divsufsort limitation)*/ |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
526 if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20)); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
527 while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
528 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
529 /* sort */ |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
530 DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20)); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
531 { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
532 if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
533 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
534 suffix[bufferSize] = (int)bufferSize; /* leads into noise */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
535 suffix0[0] = (int)bufferSize; /* leads into noise */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
536 /* build reverse suffix sort */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
537 { size_t pos; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
538 for (pos=0; pos < bufferSize; pos++) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
539 reverseSuffix[suffix[pos]] = (U32)pos; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
540 /* note filePos tracks borders between samples. |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
541 It's not used at this stage, but planned to become useful in a later update */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
542 filePos[0] = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
543 for (pos=1; pos<nbFiles; pos++) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
544 filePos[pos] = (U32)(filePos[pos-1] + fileSizes[pos-1]); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
545 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
546 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
547 DISPLAYLEVEL(2, "finding patterns ... \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
548 DISPLAYLEVEL(3, "minimum ratio : %u \n", minRatio); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
549 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
550 { U32 cursor; for (cursor=0; cursor < bufferSize; ) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
551 dictItem solution; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
552 if (doneMarks[cursor]) { cursor++; continue; } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
553 solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
554 if (solution.length==0) { cursor++; continue; } |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
555 ZDICT_insertDictItem(dictList, dictListSize, solution, buffer); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
556 cursor += solution.length; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
557 DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
558 } } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
559 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
560 _cleanup: |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
561 free(suffix0); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
562 free(reverseSuffix); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
563 free(doneMarks); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
564 free(filePos); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
565 return result; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
566 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
567 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
568 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
569 static void ZDICT_fillNoise(void* buffer, size_t length) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
570 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
571 unsigned const prime1 = 2654435761U; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
572 unsigned const prime2 = 2246822519U; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
573 unsigned acc = prime1; |
43994
de7838053207
zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42937
diff
changeset
|
574 size_t p=0; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
575 for (p=0; p<length; p++) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
576 acc *= prime2; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
577 ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
578 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
579 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
580 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
581 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
582 typedef struct |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
583 { |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
584 ZSTD_CDict* dict; /* dictionary */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
585 ZSTD_CCtx* zc; /* working context */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
586 void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
587 } EStats_ress_t; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
588 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
589 #define MAXREPOFFSET 1024 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
590 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
591 static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params, |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
592 unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets, |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
593 const void* src, size_t srcSize, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
594 U32 notificationLevel) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
595 { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
596 size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
597 size_t cSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
598 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
599 if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
600 { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
601 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; } |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
602 |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
603 } |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
604 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
605 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
606 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
607 if (cSize) { /* if == 0; block is not compressible */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
608 const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
609 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
610 /* literals stats */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
611 { const BYTE* bytePtr; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
612 for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
613 countLit[*bytePtr]++; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
614 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
615 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
616 /* seqStats */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
617 { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
618 ZSTD_seqToCodes(seqStorePtr); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
619 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
620 { const BYTE* codePtr = seqStorePtr->ofCode; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
621 U32 u; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
622 for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
623 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
624 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
625 { const BYTE* codePtr = seqStorePtr->mlCode; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
626 U32 u; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
627 for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
628 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
629 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
630 { const BYTE* codePtr = seqStorePtr->llCode; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
631 U32 u; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
632 for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
633 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
634 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
635 if (nbSeq >= 2) { /* rep offsets */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
636 const seqDef* const seq = seqStorePtr->sequencesStart; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
637 U32 offset1 = seq[0].offset - 3; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
638 U32 offset2 = seq[1].offset - 3; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
639 if (offset1 >= MAXREPOFFSET) offset1 = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
640 if (offset2 >= MAXREPOFFSET) offset2 = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
641 repOffsets[offset1] += 3; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
642 repOffsets[offset2] += 1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
643 } } } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
644 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
645 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
646 static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
647 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
648 size_t total=0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
649 unsigned u; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
650 for (u=0; u<nbFiles; u++) total += fileSizes[u]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
651 return total; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
652 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
653 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
654 typedef struct { U32 offset; U32 count; } offsetCount_t; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
655 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
656 static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val, U32 count) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
657 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
658 U32 u; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
659 table[ZSTD_REP_NUM].offset = val; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
660 table[ZSTD_REP_NUM].count = count; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
661 for (u=ZSTD_REP_NUM; u>0; u--) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
662 offsetCount_t tmp; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
663 if (table[u-1].count >= table[u].count) break; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
664 tmp = table[u-1]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
665 table[u-1] = table[u]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
666 table[u] = tmp; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
667 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
668 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
669 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
670 /* ZDICT_flatLit() : |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
671 * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals. |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
672 * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode. |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
673 */ |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
674 static void ZDICT_flatLit(unsigned* countLit) |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
675 { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
676 int u; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
677 for (u=1; u<256; u++) countLit[u] = 2; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
678 countLit[0] = 4; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
679 countLit[253] = 1; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
680 countLit[254] = 1; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
681 } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
682 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
683 #define OFFCODE_MAX 30 /* only applicable to first block */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
684 static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
685 unsigned compressionLevel, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
686 const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
687 const void* dictBuffer, size_t dictBufferSize, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
688 unsigned notificationLevel) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
689 { |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
690 unsigned countLit[256]; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
691 HUF_CREATE_STATIC_CTABLE(hufTable, 255); |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
692 unsigned offcodeCount[OFFCODE_MAX+1]; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
693 short offcodeNCount[OFFCODE_MAX+1]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
694 U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB)); |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
695 unsigned matchLengthCount[MaxML+1]; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
696 short matchLengthNCount[MaxML+1]; |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
697 unsigned litLengthCount[MaxLL+1]; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
698 short litLengthNCount[MaxLL+1]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
699 U32 repOffset[MAXREPOFFSET]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
700 offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
701 EStats_ress_t esr = { NULL, NULL, NULL }; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
702 ZSTD_parameters params; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
703 U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
704 size_t pos = 0, errorCode; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
705 size_t eSize = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
706 size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
707 size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
708 BYTE* dstPtr = (BYTE*)dstBuffer; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
709 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
710 /* init */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
711 DEBUGLOG(4, "ZDICT_analyzeEntropy"); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
712 if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
713 for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
714 for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
715 for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
716 for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
717 memset(repOffset, 0, sizeof(repOffset)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
718 repOffset[1] = repOffset[4] = repOffset[8] = 1; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
719 memset(bestRepOffset, 0, sizeof(bestRepOffset)); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
720 if (compressionLevel==0) compressionLevel = g_compressionLevel_default; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
721 params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
722 |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
723 esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
724 esr.zc = ZSTD_createCCtx(); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
725 esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
726 if (!esr.dict || !esr.zc || !esr.workPlace) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
727 eSize = ERROR(memory_allocation); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
728 DISPLAYLEVEL(1, "Not enough memory \n"); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
729 goto _cleanup; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
730 } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
731 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
732 /* collect stats on all samples */ |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
733 for (u=0; u<nbFiles; u++) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
734 ZDICT_countEStats(esr, params, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
735 countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
736 (const char*)srcBuffer + pos, fileSizes[u], |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
737 notificationLevel); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
738 pos += fileSizes[u]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
739 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
740 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
741 /* analyze, build stats, starting with literals */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
742 { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
743 if (HUF_isError(maxNbBits)) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
744 eSize = maxNbBits; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
745 DISPLAYLEVEL(1, " HUF_buildCTable error \n"); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
746 goto _cleanup; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
747 } |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
748 if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
749 DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n"); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
750 ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
751 maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
752 assert(maxNbBits==9); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
753 } |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
754 huffLog = (U32)maxNbBits; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
755 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
756 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
757 /* looking for most common first offsets */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
758 { U32 offset; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
759 for (offset=1; offset<MAXREPOFFSET; offset++) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
760 ZDICT_insertSortCount(bestRepOffset, offset, repOffset[offset]); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
761 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
762 /* note : the result of this phase should be used to better appreciate the impact on statistics */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
763 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
764 total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
765 errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
766 if (FSE_isError(errorCode)) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
767 eSize = errorCode; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
768 DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
769 goto _cleanup; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
770 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
771 Offlog = (U32)errorCode; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
772 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
773 total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
774 errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
775 if (FSE_isError(errorCode)) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
776 eSize = errorCode; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
777 DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
778 goto _cleanup; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
779 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
780 mlLog = (U32)errorCode; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
781 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
782 total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
783 errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
784 if (FSE_isError(errorCode)) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
785 eSize = errorCode; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
786 DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
787 goto _cleanup; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
788 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
789 llLog = (U32)errorCode; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
790 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
791 /* write result to buffer */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
792 { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
793 if (HUF_isError(hhSize)) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
794 eSize = hhSize; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
795 DISPLAYLEVEL(1, "HUF_writeCTable error \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
796 goto _cleanup; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
797 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
798 dstPtr += hhSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
799 maxDstSize -= hhSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
800 eSize += hhSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
801 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
802 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
803 { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
804 if (FSE_isError(ohSize)) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
805 eSize = ohSize; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
806 DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
807 goto _cleanup; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
808 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
809 dstPtr += ohSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
810 maxDstSize -= ohSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
811 eSize += ohSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
812 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
813 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
814 { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
815 if (FSE_isError(mhSize)) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
816 eSize = mhSize; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
817 DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
818 goto _cleanup; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
819 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
820 dstPtr += mhSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
821 maxDstSize -= mhSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
822 eSize += mhSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
823 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
824 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
825 { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
826 if (FSE_isError(lhSize)) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
827 eSize = lhSize; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
828 DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
829 goto _cleanup; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
830 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
831 dstPtr += lhSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
832 maxDstSize -= lhSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
833 eSize += lhSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
834 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
835 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
836 if (maxDstSize<12) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
837 eSize = ERROR(dstSize_tooSmall); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
838 DISPLAYLEVEL(1, "not enough space to write RepOffsets \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
839 goto _cleanup; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
840 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
841 # if 0 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
842 MEM_writeLE32(dstPtr+0, bestRepOffset[0].offset); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
843 MEM_writeLE32(dstPtr+4, bestRepOffset[1].offset); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
844 MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
845 #else |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
846 /* at this stage, we don't use the result of "most common first offset", |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
847 as the impact of statistics is not properly evaluated */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
848 MEM_writeLE32(dstPtr+0, repStartValue[0]); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
849 MEM_writeLE32(dstPtr+4, repStartValue[1]); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
850 MEM_writeLE32(dstPtr+8, repStartValue[2]); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
851 #endif |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
852 eSize += 12; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
853 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
854 _cleanup: |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
855 ZSTD_freeCDict(esr.dict); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
856 ZSTD_freeCCtx(esr.zc); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
857 free(esr.workPlace); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
858 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
859 return eSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
860 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
861 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
862 |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
863 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
864 size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
865 const void* customDictContent, size_t dictContentSize, |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
866 const void* samplesBuffer, const size_t* samplesSizes, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
867 unsigned nbSamples, ZDICT_params_t params) |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
868 { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
869 size_t hSize; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
870 #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */ |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
871 BYTE header[HBUFFSIZE]; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
872 int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
873 U32 const notificationLevel = params.notificationLevel; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
874 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
875 /* check conditions */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
876 DEBUGLOG(4, "ZDICT_finalizeDictionary"); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
877 if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
878 if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
879 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
880 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
881 /* dictionary header */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
882 MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
883 { U64 const randomID = XXH64(customDictContent, dictContentSize, 0); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
884 U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
885 U32 const dictID = params.dictID ? params.dictID : compliantID; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
886 MEM_writeLE32(header+4, dictID); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
887 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
888 hSize = 8; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
889 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
890 /* entropy tables */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
891 DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
892 DISPLAYLEVEL(2, "statistics ... \n"); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
893 { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
894 compressionLevel, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
895 samplesBuffer, samplesSizes, nbSamples, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
896 customDictContent, dictContentSize, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
897 notificationLevel); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
898 if (ZDICT_isError(eSize)) return eSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
899 hSize += eSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
900 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
901 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
902 /* copy elements in final buffer ; note : src and dst buffer can overlap */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
903 if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
904 { size_t const dictSize = hSize + dictContentSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
905 char* dictEnd = (char*)dictBuffer + dictSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
906 memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
907 memcpy(dictBuffer, header, hSize); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
908 return dictSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
909 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
910 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
911 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30822
diff
changeset
|
912 |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
913 static size_t ZDICT_addEntropyTablesFromBuffer_advanced( |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
914 void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
915 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
916 ZDICT_params_t params) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
917 { |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
918 int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
919 U32 const notificationLevel = params.notificationLevel; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
920 size_t hSize = 8; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
921 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
922 /* calculate entropy tables */ |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
923 DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
924 DISPLAYLEVEL(2, "statistics ... \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
925 { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
926 compressionLevel, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
927 samplesBuffer, samplesSizes, nbSamples, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
928 (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
929 notificationLevel); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
930 if (ZDICT_isError(eSize)) return eSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
931 hSize += eSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
932 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
933 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
934 /* add dictionary header (after entropy tables) */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
935 MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
936 { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
937 U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
938 U32 const dictID = params.dictID ? params.dictID : compliantID; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
939 MEM_writeLE32((char*)dictBuffer+4, dictID); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
940 } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
941 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
942 if (hSize + dictContentSize < dictBufferCapacity) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
943 memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
944 return MIN(dictBufferCapacity, hSize+dictContentSize); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
945 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
946 |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
947 /* Hidden declaration for dbio.c */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
948 size_t ZDICT_trainFromBuffer_unsafe_legacy( |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
949 void* dictBuffer, size_t maxDictSize, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
950 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
951 ZDICT_legacy_params_t params); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
952 /*! ZDICT_trainFromBuffer_unsafe_legacy() : |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
953 * Warning : `samplesBuffer` must be followed by noisy guard band. |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
954 * @return : size of dictionary, or an error code which can be tested with ZDICT_isError() |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
955 */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
956 size_t ZDICT_trainFromBuffer_unsafe_legacy( |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
957 void* dictBuffer, size_t maxDictSize, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
958 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
959 ZDICT_legacy_params_t params) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
960 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
961 U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
962 dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
963 unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
964 unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
965 size_t const targetDictSize = maxDictSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
966 size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
967 size_t dictSize = 0; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
968 U32 const notificationLevel = params.zParams.notificationLevel; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
969 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
970 /* checks */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
971 if (!dictList) return ERROR(memory_allocation); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
972 if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
973 if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */ |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
974 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
975 /* init */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
976 ZDICT_initDictItem(dictList); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
977 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
978 /* build dictionary */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
979 ZDICT_trainBuffer_legacy(dictList, dictListSize, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
980 samplesBuffer, samplesBuffSize, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
981 samplesSizes, nbSamples, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
982 minRep, notificationLevel); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
983 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
984 /* display best matches */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
985 if (params.zParams.notificationLevel>= 3) { |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
986 unsigned const nb = MIN(25, dictList[0].pos); |
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
987 unsigned const dictContentSize = ZDICT_dictSize(dictList); |
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
988 unsigned u; |
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
989 DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize); |
30822
b54a2984cdd4
zstd: vendor python-zstandard 0.6.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30434
diff
changeset
|
990 DISPLAYLEVEL(3, "list %u best segments \n", nb-1); |
b54a2984cdd4
zstd: vendor python-zstandard 0.6.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30434
diff
changeset
|
991 for (u=1; u<nb; u++) { |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
992 unsigned const pos = dictList[u].pos; |
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
993 unsigned const length = dictList[u].length; |
30822
b54a2984cdd4
zstd: vendor python-zstandard 0.6.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30434
diff
changeset
|
994 U32 const printedLength = MIN(40, length); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
995 if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
996 free(dictList); |
30822
b54a2984cdd4
zstd: vendor python-zstandard 0.6.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30434
diff
changeset
|
997 return ERROR(GENERIC); /* should never happen */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
998 } |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
999 DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
1000 u, length, pos, (unsigned)dictList[u].savings); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1001 ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1002 DISPLAYLEVEL(3, "| \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1003 } } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1004 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1005 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1006 /* create dictionary */ |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
1007 { unsigned dictContentSize = ZDICT_dictSize(dictList); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1008 if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1009 if (dictContentSize < targetDictSize/4) { |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
1010 DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1011 if (samplesBuffSize < 10 * targetDictSize) |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
1012 DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20)); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1013 if (minRep > MINRATIO) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1014 DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1015 DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n"); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1016 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1017 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1018 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1019 if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
1020 unsigned proposedSelectivity = selectivity-1; |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1021 while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; } |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
1022 DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1023 DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1024 DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n"); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1025 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1026 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1027 /* limit dictionary size */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1028 { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1029 U32 currentSize = 0; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1030 U32 n; for (n=1; n<max; n++) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1031 currentSize += dictList[n].length; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1032 if (currentSize > targetDictSize) { currentSize -= dictList[n].length; break; } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1033 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1034 dictList->pos = n; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1035 dictContentSize = currentSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1036 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1037 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1038 /* build dict content */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1039 { U32 u; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1040 BYTE* ptr = (BYTE*)dictBuffer + maxDictSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1041 for (u=1; u<dictList->pos; u++) { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1042 U32 l = dictList[u].length; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1043 ptr -= l; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1044 if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); } /* should not happen */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1045 memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1046 } } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1047 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1048 dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1049 samplesBuffer, samplesSizes, nbSamples, |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1050 params.zParams); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1051 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1052 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1053 /* clean up */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1054 free(dictList); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1055 return dictSize; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1056 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1057 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1058 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1059 /* ZDICT_trainFromBuffer_legacy() : |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1060 * issue : samplesBuffer need to be followed by a noisy guard band. |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1061 * work around : duplicate the buffer, and add the noise */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1062 size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1063 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1064 ZDICT_legacy_params_t params) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1065 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1066 size_t result; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1067 void* newBuff; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1068 size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1069 if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1070 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1071 newBuff = malloc(sBuffSize + NOISELENGTH); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1072 if (!newBuff) return ERROR(memory_allocation); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1073 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1074 memcpy(newBuff, samplesBuffer, sBuffSize); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1075 ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1076 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1077 result = |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1078 ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1079 samplesSizes, nbSamples, params); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1080 free(newBuff); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1081 return result; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1082 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1083 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1084 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1085 size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1086 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1087 { |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1088 ZDICT_fastCover_params_t params; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1089 DEBUGLOG(3, "ZDICT_trainFromBuffer"); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1090 memset(¶ms, 0, sizeof(params)); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1091 params.d = 8; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1092 params.steps = 4; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1093 /* Default to level 6 since no compression level information is available */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1094 params.zParams.compressionLevel = 3; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1095 #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1) |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1096 params.zParams.notificationLevel = DEBUGLEVEL; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1097 #endif |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1098 return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity, |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1099 samplesBuffer, samplesSizes, nbSamples, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1100 ¶ms); |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1101 } |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1102 |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1103 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1104 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) |
30434
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1105 { |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1106 ZDICT_params_t params; |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1107 memset(¶ms, 0, sizeof(params)); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1108 return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1109 samplesBuffer, samplesSizes, nbSamples, |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1110 params); |
2e484bdea8c4
zstd: vendor zstd 1.1.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1111 } |