contrib/python-zstandard/zstd/dictBuilder/zdict.h
changeset 30895 c32454d69b85
parent 30434 2e484bdea8c4
child 37495 b1fb341d8a61
equal deleted inserted replaced
30894:5b60464efbde 30895:c32454d69b85
    17 
    17 
    18 /*======  Dependencies  ======*/
    18 /*======  Dependencies  ======*/
    19 #include <stddef.h>  /* size_t */
    19 #include <stddef.h>  /* size_t */
    20 
    20 
    21 
    21 
    22 /*======  Export for Windows  ======*/
    22 /* =====   ZDICTLIB_API : control library symbols visibility   ===== */
    23 /*!
    23 #if defined(__GNUC__) && (__GNUC__ >= 4)
    24 *  ZSTD_DLL_EXPORT :
    24 #  define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
    25 *  Enable exporting of functions when building a Windows DLL
       
    26 */
       
    27 #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
       
    28 #  define ZDICTLIB_API __declspec(dllexport)
       
    29 #else
    25 #else
    30 #  define ZDICTLIB_API
    26 #  define ZDICTLIB_VISIBILITY
       
    27 #endif
       
    28 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
       
    29 #  define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
       
    30 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
       
    31 #  define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
       
    32 #else
       
    33 #  define ZDICTLIB_API ZDICTLIB_VISIBILITY
    31 #endif
    34 #endif
    32 
    35 
    33 
    36 
    34 /*! ZDICT_trainFromBuffer() :
    37 /*! ZDICT_trainFromBuffer() :
    35     Train a dictionary from an array of samples.
    38     Train a dictionary from an array of samples.
    77     `parameters` is optional and can be provided with values set to 0 to mean "default".
    80     `parameters` is optional and can be provided with values set to 0 to mean "default".
    78     @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
    81     @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
    79               or an error code, which can be tested by ZDICT_isError().
    82               or an error code, which can be tested by ZDICT_isError().
    80     note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
    83     note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
    81 */
    84 */
    82 size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
    85 ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
    83                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
    86                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
    84                                 ZDICT_params_t parameters);
    87                                 ZDICT_params_t parameters);
    85 
    88 
    86 
    89 /*! COVER_params_t :
    87 /*! ZDICT_addEntropyTablesFromBuffer() :
    90     For all values 0 means default.
    88 
    91     kMin and d are the only required parameters.
    89     Given a content-only dictionary (built using any 3rd party algorithm),
    92 */
    90     add entropy tables computed from an array of samples.
    93 typedef struct {
       
    94     unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
       
    95     unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
       
    96     unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
       
    97 
       
    98     unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
       
    99     unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
       
   100     unsigned dictID;             /* 0 means auto mode (32-bits random value); other : force dictID value */
       
   101     int      compressionLevel;   /* 0 means default; target a specific zstd compression level */
       
   102 } COVER_params_t;
       
   103 
       
   104 
       
   105 /*! COVER_trainFromBuffer() :
       
   106     Train a dictionary from an array of samples using the COVER algorithm.
       
   107     Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
       
   108     supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
       
   109     The resulting dictionary will be saved into `dictBuffer`.
       
   110     @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
       
   111               or an error code, which can be tested with ZDICT_isError().
       
   112     Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
       
   113     Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
       
   114            It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
       
   115            In general, it's recommended to provide a few thousands samples, but this can vary a lot.
       
   116            It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
       
   117 */
       
   118 ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
       
   119                               const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
       
   120                               COVER_params_t parameters);
       
   121 
       
   122 /*! COVER_optimizeTrainFromBuffer() :
       
   123     The same requirements as above hold for all the parameters except `parameters`.
       
   124     This function tries many parameter combinations and picks the best parameters.
       
   125     `*parameters` is filled with the best parameters found, and the dictionary
       
   126     constructed with those parameters is stored in `dictBuffer`.
       
   127 
       
   128     All of the parameters d, k, steps are optional.
       
   129     If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
       
   130     if steps is zero it defaults to its default value.
       
   131     If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
       
   132 
       
   133     @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
       
   134               or an error code, which can be tested with ZDICT_isError().
       
   135               On success `*parameters` contains the parameters selected.
       
   136     Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
       
   137 */
       
   138 ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
       
   139                                      const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
       
   140                                      COVER_params_t *parameters);
       
   141 
       
   142 /*! ZDICT_finalizeDictionary() :
       
   143 
       
   144     Given a custom content as a basis for dictionary, and a set of samples,
       
   145     finalize dictionary by adding headers and statistics.
       
   146 
    91     Samples must be stored concatenated in a flat buffer `samplesBuffer`,
   147     Samples must be stored concatenated in a flat buffer `samplesBuffer`,
    92     supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
   148     supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
    93 
   149 
    94     The input dictionary content must be stored *at the end* of `dictBuffer`.
   150     dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
    95     Its size is `dictContentSize`.
   151     maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
    96     The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*,
   152 
    97     starting from its beginning.
   153     @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
    98     @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
   154               or an error code, which can be tested by ZDICT_isError().
    99 */
   155     note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
       
   156     note 2 : dictBuffer and customDictContent can overlap
       
   157 */
       
   158 #define ZDICT_CONTENTSIZE_MIN 256
       
   159 #define ZDICT_DICTSIZE_MIN    512
       
   160 ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
       
   161                                 const void* customDictContent, size_t dictContentSize,
       
   162                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
       
   163                                 ZDICT_params_t parameters);
       
   164 
       
   165 
       
   166 
       
   167 /* Deprecation warnings */
       
   168 /* It is generally possible to disable deprecation warnings from compiler,
       
   169    for example with -Wno-deprecated-declarations for gcc
       
   170    or _CRT_SECURE_NO_WARNINGS in Visual.
       
   171    Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
       
   172 #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
       
   173 #  define ZDICT_DEPRECATED(message) ZDICTLIB_API   /* disable deprecation warnings */
       
   174 #else
       
   175 #  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
       
   176 #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
       
   177 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]]
       
   178 #  elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
       
   179 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
       
   180 #  elif (ZDICT_GCC_VERSION >= 301)
       
   181 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
       
   182 #  elif defined(_MSC_VER)
       
   183 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
       
   184 #  else
       
   185 #    pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
       
   186 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API
       
   187 #  endif
       
   188 #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
       
   189 
       
   190 ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
   100 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
   191 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
   101                                         const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
   192                                   const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
   102 
       
   103 
   193 
   104 
   194 
   105 #endif   /* ZDICT_STATIC_LINKING_ONLY */
   195 #endif   /* ZDICT_STATIC_LINKING_ONLY */
   106 
   196 
   107 #if defined (__cplusplus)
   197 #if defined (__cplusplus)