contrib/python-zstandard/zstd/dictBuilder/cover.c
author Gregory Szorc <gregory.szorc@gmail.com>
Mon, 08 Oct 2018 16:27:40 -0700
changeset 40121 73fef626dae3
parent 37495 b1fb341d8a61
child 42070 675775c33ab6
permissions -rw-r--r--
zstandard: vendor python-zstandard 0.10.1 This was just released. The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. setup.py was updated to pass a new argument to python-zstandard's function for returning an Extension instance. Upstream had to change to use relative paths because Python 3.7's packaging doesn't seem to like absolute paths when defining sources, includes, etc. The default relative path calculation is relative to setup_zstd.py which is different from the directory of Mercurial's setup.py. The project contains a vendored copy of zstandard 1.3.6. The old version was 1.3.4. The API should be backwards compatible and nothing in core should need adjusted. However, there is a new "chunker" API that we may find useful in places where we want to emit compressed chunks of a fixed size. There are a pair of bug fixes in 0.10.0 with regards to compressobj() and decompressobj() when block flushing is used. I actually found these bugs when introducing these APIs in Mercurial! But existing Mercurial code is not affected because we don't perform block flushing. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D4911
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     1
/*
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     2
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     3
 * All rights reserved.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     4
 *
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     5
 * This source code is licensed under both the BSD-style license (found in the
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     7
 * in the COPYING file in the root directory of this source tree).
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     8
 * You may select, at your option, one of the above-listed licenses.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     9
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    10
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    11
/* *****************************************************************************
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    12
 * Constructs a dictionary using a heuristic based on the following paper:
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    13
 *
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    14
 * Liao, Petri, Moffat, Wirth
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    15
 * Effective Construction of Relative Lempel-Ziv Dictionaries
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    16
 * Published in WWW 2016.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    17
 *
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    18
 * Adapted from code originally written by @ot (Giuseppe Ottaviano).
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    19
 ******************************************************************************/
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    20
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    21
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    22
*  Dependencies
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    23
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    24
#include <stdio.h>  /* fprintf */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    25
#include <stdlib.h> /* malloc, free, qsort */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    26
#include <string.h> /* memset */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    27
#include <time.h>   /* clock */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    28
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    29
#include "mem.h" /* read */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    30
#include "pool.h"
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    31
#include "threading.h"
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
    32
#include "cover.h"
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    33
#include "zstd_internal.h" /* includes zstd.h */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    34
#ifndef ZDICT_STATIC_LINKING_ONLY
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    35
#define ZDICT_STATIC_LINKING_ONLY
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    36
#endif
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    37
#include "zdict.h"
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    38
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    39
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    40
*  Constants
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    41
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    42
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
    43
#define DEFAULT_SPLITPOINT 1.0
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    44
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    45
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    46
*  Console display
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    47
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    48
static int g_displayLevel = 2;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    49
#define DISPLAY(...)                                                           \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    50
  {                                                                            \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    51
    fprintf(stderr, __VA_ARGS__);                                              \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    52
    fflush(stderr);                                                            \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    53
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    54
#define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    55
  if (displayLevel >= l) {                                                     \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    56
    DISPLAY(__VA_ARGS__);                                                      \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    57
  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    58
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    59
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    60
#define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    61
  if (displayLevel >= l) {                                                     \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    62
    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    63
      g_time = clock();                                                        \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    64
      DISPLAY(__VA_ARGS__);                                                    \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    65
    }                                                                          \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    66
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    67
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    68
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    69
static clock_t g_time = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    70
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    71
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    72
* Hash table
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    73
***************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    74
* A small specialized hash map for storing activeDmers.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    75
* The map does not resize, so if it becomes full it will loop forever.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    76
* Thus, the map must be large enough to store every value.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    77
* The map implements linear probing and keeps its load less than 0.5.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    78
*/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    79
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    80
#define MAP_EMPTY_VALUE ((U32)-1)
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    81
typedef struct COVER_map_pair_t_s {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    82
  U32 key;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    83
  U32 value;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    84
} COVER_map_pair_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    85
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    86
typedef struct COVER_map_s {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    87
  COVER_map_pair_t *data;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    88
  U32 sizeLog;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    89
  U32 size;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    90
  U32 sizeMask;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    91
} COVER_map_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    92
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    93
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    94
 * Clear the map.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    95
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    96
static void COVER_map_clear(COVER_map_t *map) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    97
  memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    98
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    99
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   100
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   101
 * Initializes a map of the given size.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   102
 * Returns 1 on success and 0 on failure.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   103
 * The map must be destroyed with COVER_map_destroy().
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   104
 * The map is only guaranteed to be large enough to hold size elements.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   105
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   106
static int COVER_map_init(COVER_map_t *map, U32 size) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   107
  map->sizeLog = ZSTD_highbit32(size) + 2;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   108
  map->size = (U32)1 << map->sizeLog;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   109
  map->sizeMask = map->size - 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   110
  map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   111
  if (!map->data) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   112
    map->sizeLog = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   113
    map->size = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   114
    return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   115
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   116
  COVER_map_clear(map);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   117
  return 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   118
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   119
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   120
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   121
 * Internal hash function
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   122
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   123
static const U32 prime4bytes = 2654435761U;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   124
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   125
  return (key * prime4bytes) >> (32 - map->sizeLog);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   126
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   127
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   128
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   129
 * Helper function that returns the index that a key should be placed into.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   130
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   131
static U32 COVER_map_index(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   132
  const U32 hash = COVER_map_hash(map, key);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   133
  U32 i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   134
  for (i = hash;; i = (i + 1) & map->sizeMask) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   135
    COVER_map_pair_t *pos = &map->data[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   136
    if (pos->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   137
      return i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   138
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   139
    if (pos->key == key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   140
      return i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   141
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   142
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   143
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   144
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   145
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   146
 * Returns the pointer to the value for key.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   147
 * If key is not in the map, it is inserted and the value is set to 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   148
 * The map must not be full.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   149
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   150
static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   151
  COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   152
  if (pos->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   153
    pos->key = key;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   154
    pos->value = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   155
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   156
  return &pos->value;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   157
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   158
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   159
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   160
 * Deletes key from the map if present.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   161
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   162
static void COVER_map_remove(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   163
  U32 i = COVER_map_index(map, key);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   164
  COVER_map_pair_t *del = &map->data[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   165
  U32 shift = 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   166
  if (del->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   167
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   168
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   169
  for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   170
    COVER_map_pair_t *const pos = &map->data[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   171
    /* If the position is empty we are done */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   172
    if (pos->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   173
      del->value = MAP_EMPTY_VALUE;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   174
      return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   175
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   176
    /* If pos can be moved to del do so */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   177
    if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   178
      del->key = pos->key;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   179
      del->value = pos->value;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   180
      del = pos;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   181
      shift = 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   182
    } else {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   183
      ++shift;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   184
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   185
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   186
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   187
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   188
/**
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   189
 * Destroys a map that is inited with COVER_map_init().
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   190
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   191
static void COVER_map_destroy(COVER_map_t *map) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   192
  if (map->data) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   193
    free(map->data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   194
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   195
  map->data = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   196
  map->size = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   197
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   198
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   199
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   200
* Context
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   201
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   202
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   203
typedef struct {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   204
  const BYTE *samples;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   205
  size_t *offsets;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   206
  const size_t *samplesSizes;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   207
  size_t nbSamples;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   208
  size_t nbTrainSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   209
  size_t nbTestSamples;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   210
  U32 *suffix;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   211
  size_t suffixSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   212
  U32 *freqs;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   213
  U32 *dmerAt;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   214
  unsigned d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   215
} COVER_ctx_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   216
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   217
/* We need a global context for qsort... */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   218
static COVER_ctx_t *g_ctx = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   219
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   220
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   221
*  Helper functions
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   222
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   223
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   224
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   225
 * Returns the sum of the sample sizes.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   226
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   227
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   228
  size_t sum = 0;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   229
  unsigned i;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   230
  for (i = 0; i < nbSamples; ++i) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   231
    sum += samplesSizes[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   232
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   233
  return sum;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   234
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   235
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   236
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   237
 * Returns -1 if the dmer at lp is less than the dmer at rp.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   238
 * Return 0 if the dmers at lp and rp are equal.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   239
 * Returns 1 if the dmer at lp is greater than the dmer at rp.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   240
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   241
static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   242
  U32 const lhs = *(U32 const *)lp;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   243
  U32 const rhs = *(U32 const *)rp;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   244
  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   245
}
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   246
/**
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   247
 * Faster version for d <= 8.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   248
 */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   249
static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   250
  U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   251
  U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   252
  U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   253
  if (lhs < rhs) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   254
    return -1;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   255
  }
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   256
  return (lhs > rhs);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   257
}
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   258
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   259
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   260
 * Same as COVER_cmp() except ties are broken by pointer value
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   261
 * NOTE: g_ctx must be set to call this function.  A global is required because
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   262
 * qsort doesn't take an opaque pointer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   263
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   264
static int COVER_strict_cmp(const void *lp, const void *rp) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   265
  int result = COVER_cmp(g_ctx, lp, rp);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   266
  if (result == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   267
    result = lp < rp ? -1 : 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   268
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   269
  return result;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   270
}
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   271
/**
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   272
 * Faster version for d <= 8.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   273
 */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   274
static int COVER_strict_cmp8(const void *lp, const void *rp) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   275
  int result = COVER_cmp8(g_ctx, lp, rp);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   276
  if (result == 0) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   277
    result = lp < rp ? -1 : 1;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   278
  }
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   279
  return result;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   280
}
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   281
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   282
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   283
 * Returns the first pointer in [first, last) whose element does not compare
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   284
 * less than value.  If no such element exists it returns last.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   285
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   286
static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   287
                                       size_t value) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   288
  size_t count = last - first;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   289
  while (count != 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   290
    size_t step = count / 2;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   291
    const size_t *ptr = first;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   292
    ptr += step;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   293
    if (*ptr < value) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   294
      first = ++ptr;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   295
      count -= step + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   296
    } else {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   297
      count = step;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   298
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   299
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   300
  return first;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   301
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   302
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   303
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   304
 * Generic groupBy function.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   305
 * Groups an array sorted by cmp into groups with equivalent values.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   306
 * Calls grp for each group.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   307
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   308
static void
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   309
COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   310
              int (*cmp)(COVER_ctx_t *, const void *, const void *),
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   311
              void (*grp)(COVER_ctx_t *, const void *, const void *)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   312
  const BYTE *ptr = (const BYTE *)data;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   313
  size_t num = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   314
  while (num < count) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   315
    const BYTE *grpEnd = ptr + size;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   316
    ++num;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   317
    while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   318
      grpEnd += size;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   319
      ++num;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   320
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   321
    grp(ctx, ptr, grpEnd);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   322
    ptr = grpEnd;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   323
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   324
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   325
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   326
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   327
*  Cover functions
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   328
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   329
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   330
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   331
 * Called on each group of positions with the same dmer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   332
 * Counts the frequency of each dmer and saves it in the suffix array.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   333
 * Fills `ctx->dmerAt`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   334
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   335
static void COVER_group(COVER_ctx_t *ctx, const void *group,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   336
                        const void *groupEnd) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   337
  /* The group consists of all the positions with the same first d bytes. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   338
  const U32 *grpPtr = (const U32 *)group;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   339
  const U32 *grpEnd = (const U32 *)groupEnd;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   340
  /* The dmerId is how we will reference this dmer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   341
   * This allows us to map the whole dmer space to a much smaller space, the
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   342
   * size of the suffix array.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   343
   */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   344
  const U32 dmerId = (U32)(grpPtr - ctx->suffix);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   345
  /* Count the number of samples this dmer shows up in */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   346
  U32 freq = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   347
  /* Details */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   348
  const size_t *curOffsetPtr = ctx->offsets;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   349
  const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   350
  /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   351
   * different sample than the last.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   352
   */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   353
  size_t curSampleEnd = ctx->offsets[0];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   354
  for (; grpPtr != grpEnd; ++grpPtr) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   355
    /* Save the dmerId for this position so we can get back to it. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   356
    ctx->dmerAt[*grpPtr] = dmerId;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   357
    /* Dictionaries only help for the first reference to the dmer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   358
     * After that zstd can reference the match from the previous reference.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   359
     * So only count each dmer once for each sample it is in.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   360
     */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   361
    if (*grpPtr < curSampleEnd) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   362
      continue;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   363
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   364
    freq += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   365
    /* Binary search to find the end of the sample *grpPtr is in.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   366
     * In the common case that grpPtr + 1 == grpEnd we can skip the binary
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   367
     * search because the loop is over.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   368
     */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   369
    if (grpPtr + 1 != grpEnd) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   370
      const size_t *sampleEndPtr =
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   371
          COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   372
      curSampleEnd = *sampleEndPtr;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   373
      curOffsetPtr = sampleEndPtr + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   374
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   375
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   376
  /* At this point we are never going to look at this segment of the suffix
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   377
   * array again.  We take advantage of this fact to save memory.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   378
   * We store the frequency of the dmer in the first position of the group,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   379
   * which is dmerId.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   380
   */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   381
  ctx->suffix[dmerId] = freq;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   382
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   383
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   384
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   385
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   386
 * Selects the best segment in an epoch.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   387
 * Segments of are scored according to the function:
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   388
 *
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   389
 * Let F(d) be the frequency of dmer d.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   390
 * Let S_i be the dmer at position i of segment S which has length k.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   391
 *
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   392
 *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   393
 *
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   394
 * Once the dmer d is in the dictionay we set F(d) = 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   395
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   396
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   397
                                           COVER_map_t *activeDmers, U32 begin,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   398
                                           U32 end,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   399
                                           ZDICT_cover_params_t parameters) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   400
  /* Constants */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   401
  const U32 k = parameters.k;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   402
  const U32 d = parameters.d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   403
  const U32 dmersInK = k - d + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   404
  /* Try each segment (activeSegment) and save the best (bestSegment) */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   405
  COVER_segment_t bestSegment = {0, 0, 0};
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   406
  COVER_segment_t activeSegment;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   407
  /* Reset the activeDmers in the segment */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   408
  COVER_map_clear(activeDmers);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   409
  /* The activeSegment starts at the beginning of the epoch. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   410
  activeSegment.begin = begin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   411
  activeSegment.end = begin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   412
  activeSegment.score = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   413
  /* Slide the activeSegment through the whole epoch.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   414
   * Save the best segment in bestSegment.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   415
   */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   416
  while (activeSegment.end < end) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   417
    /* The dmerId for the dmer at the next position */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   418
    U32 newDmer = ctx->dmerAt[activeSegment.end];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   419
    /* The entry in activeDmers for this dmerId */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   420
    U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   421
    /* If the dmer isn't already present in the segment add its score. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   422
    if (*newDmerOcc == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   423
      /* The paper suggest using the L-0.5 norm, but experiments show that it
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   424
       * doesn't help.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   425
       */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   426
      activeSegment.score += freqs[newDmer];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   427
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   428
    /* Add the dmer to the segment */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   429
    activeSegment.end += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   430
    *newDmerOcc += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   431
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   432
    /* If the window is now too large, drop the first position */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   433
    if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   434
      U32 delDmer = ctx->dmerAt[activeSegment.begin];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   435
      U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   436
      activeSegment.begin += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   437
      *delDmerOcc -= 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   438
      /* If this is the last occurence of the dmer, subtract its score */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   439
      if (*delDmerOcc == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   440
        COVER_map_remove(activeDmers, delDmer);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   441
        activeSegment.score -= freqs[delDmer];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   442
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   443
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   444
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   445
    /* If this segment is the best so far save it */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   446
    if (activeSegment.score > bestSegment.score) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   447
      bestSegment = activeSegment;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   448
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   449
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   450
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   451
    /* Trim off the zero frequency head and tail from the segment. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   452
    U32 newBegin = bestSegment.end;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   453
    U32 newEnd = bestSegment.begin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   454
    U32 pos;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   455
    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   456
      U32 freq = freqs[ctx->dmerAt[pos]];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   457
      if (freq != 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   458
        newBegin = MIN(newBegin, pos);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   459
        newEnd = pos + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   460
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   461
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   462
    bestSegment.begin = newBegin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   463
    bestSegment.end = newEnd;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   464
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   465
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   466
    /* Zero out the frequency of each dmer covered by the chosen segment. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   467
    U32 pos;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   468
    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   469
      freqs[ctx->dmerAt[pos]] = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   470
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   471
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   472
  return bestSegment;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   473
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   474
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   475
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   476
 * Check the validity of the parameters.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   477
 * Returns non-zero if the parameters are valid and 0 otherwise.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   478
 */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   479
static int COVER_checkParameters(ZDICT_cover_params_t parameters,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   480
                                 size_t maxDictSize) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   481
  /* k and d are required parameters */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   482
  if (parameters.d == 0 || parameters.k == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   483
    return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   484
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   485
  /* k <= maxDictSize */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   486
  if (parameters.k > maxDictSize) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   487
    return 0;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   488
  }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   489
  /* d <= k */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   490
  if (parameters.d > parameters.k) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   491
    return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   492
  }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   493
  /* 0 < splitPoint <= 1 */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   494
  if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   495
    return 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   496
  }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   497
  return 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   498
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   499
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   500
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   501
 * Clean up a context initialized with `COVER_ctx_init()`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   502
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   503
static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   504
  if (!ctx) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   505
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   506
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   507
  if (ctx->suffix) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   508
    free(ctx->suffix);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   509
    ctx->suffix = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   510
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   511
  if (ctx->freqs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   512
    free(ctx->freqs);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   513
    ctx->freqs = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   514
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   515
  if (ctx->dmerAt) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   516
    free(ctx->dmerAt);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   517
    ctx->dmerAt = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   518
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   519
  if (ctx->offsets) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   520
    free(ctx->offsets);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   521
    ctx->offsets = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   522
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   523
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   524
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   525
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   526
 * Prepare a context for dictionary building.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   527
 * The context is only dependent on the parameter `d` and can used multiple
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   528
 * times.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   529
 * Returns 1 on success or zero on error.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   530
 * The context must be destroyed with `COVER_ctx_destroy()`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   531
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   532
static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   533
                          const size_t *samplesSizes, unsigned nbSamples,
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   534
                          unsigned d, double splitPoint) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   535
  const BYTE *const samples = (const BYTE *)samplesBuffer;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   536
  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   537
  /* Split samples into testing and training sets */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   538
  const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   539
  const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   540
  const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   541
  const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   542
  /* Checks */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   543
  if (totalSamplesSize < MAX(d, sizeof(U64)) ||
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   544
      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   545
    DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   546
                 (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   547
    return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   548
  }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   549
  /* Check if there are at least 5 training samples */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   550
  if (nbTrainSamples < 5) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   551
    DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   552
    return 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   553
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   554
  /* Check if there's testing sample */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   555
  if (nbTestSamples < 1) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   556
    DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   557
    return 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   558
  }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   559
  /* Zero the context */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   560
  memset(ctx, 0, sizeof(*ctx));
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   561
  DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   562
               (U32)trainingSamplesSize);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   563
  DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   564
               (U32)testSamplesSize);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   565
  ctx->samples = samples;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   566
  ctx->samplesSizes = samplesSizes;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   567
  ctx->nbSamples = nbSamples;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   568
  ctx->nbTrainSamples = nbTrainSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   569
  ctx->nbTestSamples = nbTestSamples;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   570
  /* Partial suffix array */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   571
  ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   572
  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   573
  /* Maps index to the dmerID */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   574
  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   575
  /* The offsets of each file */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   576
  ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   577
  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   578
    DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   579
    COVER_ctx_destroy(ctx);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   580
    return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   581
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   582
  ctx->freqs = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   583
  ctx->d = d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   584
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   585
  /* Fill offsets from the samplesSizes */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   586
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   587
    U32 i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   588
    ctx->offsets[0] = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   589
    for (i = 1; i <= nbSamples; ++i) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   590
      ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   591
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   592
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   593
  DISPLAYLEVEL(2, "Constructing partial suffix array\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   594
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   595
    /* suffix is a partial suffix array.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   596
     * It only sorts suffixes by their first parameters.d bytes.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   597
     * The sort is stable, so each dmer group is sorted by position in input.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   598
     */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   599
    U32 i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   600
    for (i = 0; i < ctx->suffixSize; ++i) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   601
      ctx->suffix[i] = i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   602
    }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   603
    /* qsort doesn't take an opaque pointer, so pass as a global.
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   604
     * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   605
     */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   606
    g_ctx = ctx;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   607
#if defined(__OpenBSD__)
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   608
    mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   609
          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   610
#else
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   611
    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   612
          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   613
#endif
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   614
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   615
  DISPLAYLEVEL(2, "Computing frequencies\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   616
  /* For each dmer group (group of positions with the same first d bytes):
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   617
   * 1. For each position we set dmerAt[position] = dmerID.  The dmerID is
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   618
   *    (groupBeginPtr - suffix).  This allows us to go from position to
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   619
   *    dmerID so we can look up values in freq.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   620
   * 2. We calculate how many samples the dmer occurs in and save it in
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   621
   *    freqs[dmerId].
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   622
   */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   623
  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   624
                (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   625
  ctx->freqs = ctx->suffix;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   626
  ctx->suffix = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   627
  return 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   628
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   629
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   630
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   631
 * Given the prepared context build the dictionary.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   632
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   633
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   634
                                    COVER_map_t *activeDmers, void *dictBuffer,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   635
                                    size_t dictBufferCapacity,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   636
                                    ZDICT_cover_params_t parameters) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   637
  BYTE *const dict = (BYTE *)dictBuffer;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   638
  size_t tail = dictBufferCapacity;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   639
  /* Divide the data up into epochs of equal size.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   640
   * We will select at least one segment from each epoch.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   641
   */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   642
  const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   643
  const U32 epochSize = (U32)(ctx->suffixSize / epochs);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   644
  size_t epoch;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   645
  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   646
               epochSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   647
  /* Loop through the epochs until there are no more segments or the dictionary
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   648
   * is full.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   649
   */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   650
  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   651
    const U32 epochBegin = (U32)(epoch * epochSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   652
    const U32 epochEnd = epochBegin + epochSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   653
    size_t segmentSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   654
    /* Select a segment */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   655
    COVER_segment_t segment = COVER_selectSegment(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   656
        ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   657
    /* If the segment covers no dmers, then we are out of content */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   658
    if (segment.score == 0) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   659
      break;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   660
    }
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   661
    /* Trim the segment if necessary and if it is too small then we are done */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   662
    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   663
    if (segmentSize < parameters.d) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   664
      break;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   665
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   666
    /* We fill the dictionary from the back to allow the best segments to be
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   667
     * referenced with the smallest offsets.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   668
     */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   669
    tail -= segmentSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   670
    memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   671
    DISPLAYUPDATE(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   672
        2, "\r%u%%       ",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   673
        (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   674
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   675
  DISPLAYLEVEL(2, "\r%79s\r", "");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   676
  return tail;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   677
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   678
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   679
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   680
    void *dictBuffer, size_t dictBufferCapacity,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   681
    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   682
    ZDICT_cover_params_t parameters)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   683
{
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   684
  BYTE* const dict = (BYTE*)dictBuffer;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   685
  COVER_ctx_t ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   686
  COVER_map_t activeDmers;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   687
  parameters.splitPoint = 1.0;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   688
  /* Initialize global data */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   689
  g_displayLevel = parameters.zParams.notificationLevel;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   690
  /* Checks */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   691
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   692
    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   693
    return ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   694
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   695
  if (nbSamples == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   696
    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   697
    return ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   698
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   699
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   700
    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   701
                 ZDICT_DICTSIZE_MIN);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   702
    return ERROR(dstSize_tooSmall);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   703
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   704
  /* Initialize context and activeDmers */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   705
  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   706
                      parameters.d, parameters.splitPoint)) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   707
    return ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   708
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   709
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   710
    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   711
    COVER_ctx_destroy(&ctx);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   712
    return ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   713
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   714
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   715
  DISPLAYLEVEL(2, "Building dictionary\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   716
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   717
    const size_t tail =
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   718
        COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   719
                              dictBufferCapacity, parameters);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   720
    const size_t dictionarySize = ZDICT_finalizeDictionary(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   721
        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   722
        samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   723
    if (!ZSTD_isError(dictionarySize)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   724
      DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   725
                   (U32)dictionarySize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   726
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   727
    COVER_ctx_destroy(&ctx);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   728
    COVER_map_destroy(&activeDmers);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   729
    return dictionarySize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   730
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   731
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   732
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   733
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   734
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   735
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   736
                                    const size_t *samplesSizes, const BYTE *samples,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   737
                                    size_t *offsets,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   738
                                    size_t nbTrainSamples, size_t nbSamples,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   739
                                    BYTE *const dict, size_t dictBufferCapacity) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   740
  size_t totalCompressedSize = ERROR(GENERIC);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   741
  /* Pointers */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   742
  ZSTD_CCtx *cctx;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   743
  ZSTD_CDict *cdict;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   744
  void *dst;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   745
  /* Local variables */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   746
  size_t dstCapacity;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   747
  size_t i;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   748
  /* Allocate dst with enough space to compress the maximum sized sample */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   749
  {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   750
    size_t maxSampleSize = 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   751
    i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   752
    for (; i < nbSamples; ++i) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   753
      maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   754
    }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   755
    dstCapacity = ZSTD_compressBound(maxSampleSize);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   756
    dst = malloc(dstCapacity);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   757
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   758
  /* Create the cctx and cdict */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   759
  cctx = ZSTD_createCCtx();
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   760
  cdict = ZSTD_createCDict(dict, dictBufferCapacity,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   761
                           parameters.zParams.compressionLevel);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   762
  if (!dst || !cctx || !cdict) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   763
    goto _compressCleanup;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   764
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   765
  /* Compress each sample and sum their sizes (or error) */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   766
  totalCompressedSize = dictBufferCapacity;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   767
  i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   768
  for (; i < nbSamples; ++i) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   769
    const size_t size = ZSTD_compress_usingCDict(
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   770
        cctx, dst, dstCapacity, samples + offsets[i],
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   771
        samplesSizes[i], cdict);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   772
    if (ZSTD_isError(size)) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   773
      totalCompressedSize = ERROR(GENERIC);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   774
      goto _compressCleanup;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   775
    }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   776
    totalCompressedSize += size;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   777
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   778
_compressCleanup:
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   779
  ZSTD_freeCCtx(cctx);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   780
  ZSTD_freeCDict(cdict);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   781
  if (dst) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   782
    free(dst);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   783
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   784
  return totalCompressedSize;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   785
}
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   786
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   787
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   788
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   789
 * Initialize the `COVER_best_t`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   790
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   791
void COVER_best_init(COVER_best_t *best) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   792
  if (best==NULL) return; /* compatible with init on NULL */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   793
  (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   794
  (void)ZSTD_pthread_cond_init(&best->cond, NULL);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   795
  best->liveJobs = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   796
  best->dict = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   797
  best->dictSize = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   798
  best->compressedSize = (size_t)-1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   799
  memset(&best->parameters, 0, sizeof(best->parameters));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   800
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   801
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   802
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   803
 * Wait until liveJobs == 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   804
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   805
void COVER_best_wait(COVER_best_t *best) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   806
  if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   807
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   808
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   809
  ZSTD_pthread_mutex_lock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   810
  while (best->liveJobs != 0) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   811
    ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   812
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   813
  ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   814
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   815
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   816
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   817
 * Call COVER_best_wait() and then destroy the COVER_best_t.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   818
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   819
void COVER_best_destroy(COVER_best_t *best) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   820
  if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   821
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   822
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   823
  COVER_best_wait(best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   824
  if (best->dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   825
    free(best->dict);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   826
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   827
  ZSTD_pthread_mutex_destroy(&best->mutex);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   828
  ZSTD_pthread_cond_destroy(&best->cond);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   829
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   830
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   831
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   832
 * Called when a thread is about to be launched.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   833
 * Increments liveJobs.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   834
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   835
void COVER_best_start(COVER_best_t *best) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   836
  if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   837
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   838
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   839
  ZSTD_pthread_mutex_lock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   840
  ++best->liveJobs;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   841
  ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   842
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   843
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   844
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   845
 * Called when a thread finishes executing, both on error or success.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   846
 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   847
 * If this dictionary is the best so far save it and its parameters.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   848
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   849
void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   850
                              ZDICT_cover_params_t parameters, void *dict,
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   851
                              size_t dictSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   852
  if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   853
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   854
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   855
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   856
    size_t liveJobs;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   857
    ZSTD_pthread_mutex_lock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   858
    --best->liveJobs;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   859
    liveJobs = best->liveJobs;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   860
    /* If the new dictionary is better */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   861
    if (compressedSize < best->compressedSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   862
      /* Allocate space if necessary */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   863
      if (!best->dict || best->dictSize < dictSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   864
        if (best->dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   865
          free(best->dict);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   866
        }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   867
        best->dict = malloc(dictSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   868
        if (!best->dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   869
          best->compressedSize = ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   870
          best->dictSize = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   871
          return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   872
        }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   873
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   874
      /* Save the dictionary, parameters, and size */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   875
      memcpy(best->dict, dict, dictSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   876
      best->dictSize = dictSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   877
      best->parameters = parameters;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   878
      best->compressedSize = compressedSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   879
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   880
    if (liveJobs == 0) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   881
      ZSTD_pthread_cond_broadcast(&best->cond);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   882
    }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   883
    ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   884
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   885
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   886
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   887
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   888
 * Parameters for COVER_tryParameters().
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   889
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   890
typedef struct COVER_tryParameters_data_s {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   891
  const COVER_ctx_t *ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   892
  COVER_best_t *best;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   893
  size_t dictBufferCapacity;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   894
  ZDICT_cover_params_t parameters;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   895
} COVER_tryParameters_data_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   896
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   897
/**
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   898
 * Tries a set of parameters and updates the COVER_best_t with the results.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   899
 * This function is thread safe if zstd is compiled with multithreaded support.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   900
 * It takes its parameters as an *OWNING* opaque pointer to support threading.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   901
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   902
static void COVER_tryParameters(void *opaque) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   903
  /* Save parameters as local variables */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   904
  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   905
  const COVER_ctx_t *const ctx = data->ctx;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   906
  const ZDICT_cover_params_t parameters = data->parameters;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   907
  size_t dictBufferCapacity = data->dictBufferCapacity;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   908
  size_t totalCompressedSize = ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   909
  /* Allocate space for hash table, dict, and freqs */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   910
  COVER_map_t activeDmers;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   911
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   912
  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   913
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   914
    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   915
    goto _cleanup;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   916
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   917
  if (!dict || !freqs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   918
    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   919
    goto _cleanup;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   920
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   921
  /* Copy the frequencies because we need to modify them */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   922
  memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   923
  /* Build the dictionary */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   924
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   925
    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   926
                                              dictBufferCapacity, parameters);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   927
    dictBufferCapacity = ZDICT_finalizeDictionary(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   928
        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   929
        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   930
        parameters.zParams);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   931
    if (ZDICT_isError(dictBufferCapacity)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   932
      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   933
      goto _cleanup;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   934
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   935
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   936
  /* Check total compressed size */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   937
  totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   938
                                                       ctx->samples, ctx->offsets,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   939
                                                       ctx->nbTrainSamples, ctx->nbSamples,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   940
                                                       dict, dictBufferCapacity);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   941
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   942
_cleanup:
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   943
  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   944
                    dictBufferCapacity);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   945
  free(data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   946
  COVER_map_destroy(&activeDmers);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   947
  if (dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   948
    free(dict);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   949
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   950
  if (freqs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   951
    free(freqs);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   952
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   953
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   954
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   955
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   956
    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   957
    const size_t *samplesSizes, unsigned nbSamples,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   958
    ZDICT_cover_params_t *parameters) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   959
  /* constants */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   960
  const unsigned nbThreads = parameters->nbThreads;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   961
  const double splitPoint =
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   962
      parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   963
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   964
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   965
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   966
  const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   967
  const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   968
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   969
  const unsigned kIterations =
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   970
      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   971
  /* Local variables */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   972
  const int displayLevel = parameters->zParams.notificationLevel;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   973
  unsigned iteration = 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   974
  unsigned d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   975
  unsigned k;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   976
  COVER_best_t best;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   977
  POOL_ctx *pool = NULL;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   978
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   979
  /* Checks */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   980
  if (splitPoint <= 0 || splitPoint > 1) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   981
    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   982
    return ERROR(GENERIC);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   983
  }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   984
  if (kMinK < kMaxD || kMaxK < kMinK) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   985
    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   986
    return ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   987
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   988
  if (nbSamples == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   989
    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   990
    return ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   991
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   992
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   993
    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   994
                 ZDICT_DICTSIZE_MIN);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   995
    return ERROR(dstSize_tooSmall);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   996
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   997
  if (nbThreads > 1) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   998
    pool = POOL_create(nbThreads, 1);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   999
    if (!pool) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1000
      return ERROR(memory_allocation);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1001
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1002
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1003
  /* Initialization */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1004
  COVER_best_init(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1005
  /* Turn down global display level to clean up display at level 2 and below */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1006
  g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1007
  /* Loop through d first because each new value needs a new context */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1008
  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1009
                    kIterations);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1010
  for (d = kMinD; d <= kMaxD; d += 2) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1011
    /* Initialize the context for this value of d */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1012
    COVER_ctx_t ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1013
    LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
  1014
    if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1015
      LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1016
      COVER_best_destroy(&best);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1017
      POOL_free(pool);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1018
      return ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1019
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1020
    /* Loop through k reusing the same context */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1021
    for (k = kMinK; k <= kMaxK; k += kStepSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1022
      /* Prepare the arguments */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1023
      COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1024
          sizeof(COVER_tryParameters_data_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1025
      LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1026
      if (!data) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1027
        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1028
        COVER_best_destroy(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1029
        COVER_ctx_destroy(&ctx);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1030
        POOL_free(pool);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1031
        return ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1032
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1033
      data->ctx = &ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1034
      data->best = &best;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1035
      data->dictBufferCapacity = dictBufferCapacity;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1036
      data->parameters = *parameters;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1037
      data->parameters.k = k;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1038
      data->parameters.d = d;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
  1039
      data->parameters.splitPoint = splitPoint;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1040
      data->parameters.steps = kSteps;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1041
      data->parameters.zParams.notificationLevel = g_displayLevel;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1042
      /* Check the parameters */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1043
      if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1044
        DISPLAYLEVEL(1, "Cover parameters incorrect\n");
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1045
        free(data);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1046
        continue;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1047
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1048
      /* Call the function and pass ownership of data to it */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1049
      COVER_best_start(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1050
      if (pool) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1051
        POOL_add(pool, &COVER_tryParameters, data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1052
      } else {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1053
        COVER_tryParameters(data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1054
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1055
      /* Print status */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1056
      LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1057
                         (U32)((iteration * 100) / kIterations));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1058
      ++iteration;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1059
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1060
    COVER_best_wait(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1061
    COVER_ctx_destroy(&ctx);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1062
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1063
  LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1064
  /* Fill the output buffer and parameters with output of the best parameters */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1065
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1066
    const size_t dictSize = best.dictSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1067
    if (ZSTD_isError(best.compressedSize)) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1068
      const size_t compressedSize = best.compressedSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1069
      COVER_best_destroy(&best);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1070
      POOL_free(pool);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1071
      return compressedSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1072
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1073
    *parameters = best.parameters;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1074
    memcpy(dictBuffer, best.dict, dictSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1075
    COVER_best_destroy(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1076
    POOL_free(pool);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1077
    return dictSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1078
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1079
}