contrib/python-zstandard/zstd/dictBuilder/cover.c
author Gregory Szorc <gregory.szorc@gmail.com>
Sun, 15 Sep 2019 20:04:00 -0700
changeset 42937 69de49c4e39c
parent 42070 675775c33ab6
child 43994 de7838053207
permissions -rw-r--r--
zstandard: vendor python-zstandard 0.12 The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. test-repo-compengines.t was updated to reflect a change in behavior of the zstd library. The project contains a vendored copy of zstandard 1.4.3. The old version was 1.3.8. This should result in some minor performance wins. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D6858
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     1
/*
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     2
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     3
 * All rights reserved.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     4
 *
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     5
 * This source code is licensed under both the BSD-style license (found in the
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     7
 * in the COPYING file in the root directory of this source tree).
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
     8
 * You may select, at your option, one of the above-listed licenses.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     9
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    10
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    11
/* *****************************************************************************
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    12
 * Constructs a dictionary using a heuristic based on the following paper:
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    13
 *
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    14
 * Liao, Petri, Moffat, Wirth
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    15
 * Effective Construction of Relative Lempel-Ziv Dictionaries
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    16
 * Published in WWW 2016.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    17
 *
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    18
 * Adapted from code originally written by @ot (Giuseppe Ottaviano).
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    19
 ******************************************************************************/
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    20
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    21
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    22
*  Dependencies
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    23
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    24
#include <stdio.h>  /* fprintf */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    25
#include <stdlib.h> /* malloc, free, qsort */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    26
#include <string.h> /* memset */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    27
#include <time.h>   /* clock */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    28
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    29
#include "mem.h" /* read */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    30
#include "pool.h"
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    31
#include "threading.h"
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
    32
#include "cover.h"
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    33
#include "zstd_internal.h" /* includes zstd.h */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    34
#ifndef ZDICT_STATIC_LINKING_ONLY
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    35
#define ZDICT_STATIC_LINKING_ONLY
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    36
#endif
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    37
#include "zdict.h"
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    38
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    39
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    40
*  Constants
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    41
***************************************/
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
    42
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
    43
#define DEFAULT_SPLITPOINT 1.0
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    44
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    45
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    46
*  Console display
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    47
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    48
static int g_displayLevel = 2;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    49
#define DISPLAY(...)                                                           \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    50
  {                                                                            \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    51
    fprintf(stderr, __VA_ARGS__);                                              \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    52
    fflush(stderr);                                                            \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    53
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    54
#define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    55
  if (displayLevel >= l) {                                                     \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    56
    DISPLAY(__VA_ARGS__);                                                      \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    57
  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    58
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    59
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    60
#define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    61
  if (displayLevel >= l) {                                                     \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    62
    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    63
      g_time = clock();                                                        \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    64
      DISPLAY(__VA_ARGS__);                                                    \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    65
    }                                                                          \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    66
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    67
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    68
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    69
static clock_t g_time = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    70
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    71
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    72
* Hash table
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    73
***************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    74
* A small specialized hash map for storing activeDmers.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    75
* The map does not resize, so if it becomes full it will loop forever.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    76
* Thus, the map must be large enough to store every value.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    77
* The map implements linear probing and keeps its load less than 0.5.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    78
*/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    79
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    80
#define MAP_EMPTY_VALUE ((U32)-1)
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    81
typedef struct COVER_map_pair_t_s {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    82
  U32 key;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    83
  U32 value;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    84
} COVER_map_pair_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    85
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    86
typedef struct COVER_map_s {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    87
  COVER_map_pair_t *data;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    88
  U32 sizeLog;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    89
  U32 size;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    90
  U32 sizeMask;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    91
} COVER_map_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    92
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    93
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    94
 * Clear the map.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    95
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    96
static void COVER_map_clear(COVER_map_t *map) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    97
  memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    98
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    99
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   100
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   101
 * Initializes a map of the given size.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   102
 * Returns 1 on success and 0 on failure.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   103
 * The map must be destroyed with COVER_map_destroy().
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   104
 * The map is only guaranteed to be large enough to hold size elements.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   105
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   106
static int COVER_map_init(COVER_map_t *map, U32 size) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   107
  map->sizeLog = ZSTD_highbit32(size) + 2;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   108
  map->size = (U32)1 << map->sizeLog;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   109
  map->sizeMask = map->size - 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   110
  map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   111
  if (!map->data) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   112
    map->sizeLog = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   113
    map->size = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   114
    return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   115
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   116
  COVER_map_clear(map);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   117
  return 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   118
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   119
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   120
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   121
 * Internal hash function
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   122
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   123
static const U32 prime4bytes = 2654435761U;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   124
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   125
  return (key * prime4bytes) >> (32 - map->sizeLog);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   126
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   127
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   128
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   129
 * Helper function that returns the index that a key should be placed into.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   130
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   131
static U32 COVER_map_index(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   132
  const U32 hash = COVER_map_hash(map, key);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   133
  U32 i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   134
  for (i = hash;; i = (i + 1) & map->sizeMask) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   135
    COVER_map_pair_t *pos = &map->data[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   136
    if (pos->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   137
      return i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   138
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   139
    if (pos->key == key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   140
      return i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   141
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   142
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   143
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   144
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   145
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   146
 * Returns the pointer to the value for key.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   147
 * If key is not in the map, it is inserted and the value is set to 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   148
 * The map must not be full.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   149
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   150
static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   151
  COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   152
  if (pos->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   153
    pos->key = key;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   154
    pos->value = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   155
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   156
  return &pos->value;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   157
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   158
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   159
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   160
 * Deletes key from the map if present.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   161
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   162
static void COVER_map_remove(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   163
  U32 i = COVER_map_index(map, key);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   164
  COVER_map_pair_t *del = &map->data[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   165
  U32 shift = 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   166
  if (del->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   167
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   168
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   169
  for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   170
    COVER_map_pair_t *const pos = &map->data[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   171
    /* If the position is empty we are done */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   172
    if (pos->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   173
      del->value = MAP_EMPTY_VALUE;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   174
      return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   175
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   176
    /* If pos can be moved to del do so */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   177
    if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   178
      del->key = pos->key;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   179
      del->value = pos->value;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   180
      del = pos;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   181
      shift = 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   182
    } else {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   183
      ++shift;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   184
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   185
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   186
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   187
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   188
/**
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   189
 * Destroys a map that is inited with COVER_map_init().
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   190
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   191
static void COVER_map_destroy(COVER_map_t *map) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   192
  if (map->data) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   193
    free(map->data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   194
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   195
  map->data = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   196
  map->size = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   197
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   198
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   199
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   200
* Context
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   201
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   202
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   203
typedef struct {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   204
  const BYTE *samples;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   205
  size_t *offsets;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   206
  const size_t *samplesSizes;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   207
  size_t nbSamples;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   208
  size_t nbTrainSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   209
  size_t nbTestSamples;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   210
  U32 *suffix;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   211
  size_t suffixSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   212
  U32 *freqs;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   213
  U32 *dmerAt;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   214
  unsigned d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   215
} COVER_ctx_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   216
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   217
/* We need a global context for qsort... */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   218
static COVER_ctx_t *g_ctx = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   219
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   220
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   221
*  Helper functions
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   222
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   223
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   224
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   225
 * Returns the sum of the sample sizes.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   226
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   227
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   228
  size_t sum = 0;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   229
  unsigned i;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   230
  for (i = 0; i < nbSamples; ++i) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   231
    sum += samplesSizes[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   232
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   233
  return sum;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   234
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   235
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   236
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   237
 * Returns -1 if the dmer at lp is less than the dmer at rp.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   238
 * Return 0 if the dmers at lp and rp are equal.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   239
 * Returns 1 if the dmer at lp is greater than the dmer at rp.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   240
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   241
static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   242
  U32 const lhs = *(U32 const *)lp;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   243
  U32 const rhs = *(U32 const *)rp;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   244
  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   245
}
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   246
/**
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   247
 * Faster version for d <= 8.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   248
 */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   249
static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   250
  U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   251
  U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   252
  U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   253
  if (lhs < rhs) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   254
    return -1;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   255
  }
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   256
  return (lhs > rhs);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   257
}
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   258
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   259
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   260
 * Same as COVER_cmp() except ties are broken by pointer value
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   261
 * NOTE: g_ctx must be set to call this function.  A global is required because
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   262
 * qsort doesn't take an opaque pointer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   263
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   264
static int COVER_strict_cmp(const void *lp, const void *rp) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   265
  int result = COVER_cmp(g_ctx, lp, rp);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   266
  if (result == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   267
    result = lp < rp ? -1 : 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   268
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   269
  return result;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   270
}
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   271
/**
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   272
 * Faster version for d <= 8.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   273
 */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   274
static int COVER_strict_cmp8(const void *lp, const void *rp) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   275
  int result = COVER_cmp8(g_ctx, lp, rp);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   276
  if (result == 0) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   277
    result = lp < rp ? -1 : 1;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   278
  }
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   279
  return result;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   280
}
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   281
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   282
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   283
 * Returns the first pointer in [first, last) whose element does not compare
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   284
 * less than value.  If no such element exists it returns last.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   285
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   286
static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   287
                                       size_t value) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   288
  size_t count = last - first;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   289
  while (count != 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   290
    size_t step = count / 2;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   291
    const size_t *ptr = first;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   292
    ptr += step;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   293
    if (*ptr < value) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   294
      first = ++ptr;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   295
      count -= step + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   296
    } else {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   297
      count = step;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   298
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   299
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   300
  return first;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   301
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   302
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   303
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   304
 * Generic groupBy function.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   305
 * Groups an array sorted by cmp into groups with equivalent values.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   306
 * Calls grp for each group.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   307
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   308
static void
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   309
COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   310
              int (*cmp)(COVER_ctx_t *, const void *, const void *),
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   311
              void (*grp)(COVER_ctx_t *, const void *, const void *)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   312
  const BYTE *ptr = (const BYTE *)data;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   313
  size_t num = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   314
  while (num < count) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   315
    const BYTE *grpEnd = ptr + size;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   316
    ++num;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   317
    while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   318
      grpEnd += size;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   319
      ++num;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   320
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   321
    grp(ctx, ptr, grpEnd);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   322
    ptr = grpEnd;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   323
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   324
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   325
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   326
/*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   327
*  Cover functions
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   328
***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   329
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   330
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   331
 * Called on each group of positions with the same dmer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   332
 * Counts the frequency of each dmer and saves it in the suffix array.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   333
 * Fills `ctx->dmerAt`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   334
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   335
static void COVER_group(COVER_ctx_t *ctx, const void *group,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   336
                        const void *groupEnd) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   337
  /* The group consists of all the positions with the same first d bytes. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   338
  const U32 *grpPtr = (const U32 *)group;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   339
  const U32 *grpEnd = (const U32 *)groupEnd;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   340
  /* The dmerId is how we will reference this dmer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   341
   * This allows us to map the whole dmer space to a much smaller space, the
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   342
   * size of the suffix array.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   343
   */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   344
  const U32 dmerId = (U32)(grpPtr - ctx->suffix);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   345
  /* Count the number of samples this dmer shows up in */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   346
  U32 freq = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   347
  /* Details */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   348
  const size_t *curOffsetPtr = ctx->offsets;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   349
  const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   350
  /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   351
   * different sample than the last.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   352
   */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   353
  size_t curSampleEnd = ctx->offsets[0];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   354
  for (; grpPtr != grpEnd; ++grpPtr) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   355
    /* Save the dmerId for this position so we can get back to it. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   356
    ctx->dmerAt[*grpPtr] = dmerId;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   357
    /* Dictionaries only help for the first reference to the dmer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   358
     * After that zstd can reference the match from the previous reference.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   359
     * So only count each dmer once for each sample it is in.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   360
     */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   361
    if (*grpPtr < curSampleEnd) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   362
      continue;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   363
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   364
    freq += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   365
    /* Binary search to find the end of the sample *grpPtr is in.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   366
     * In the common case that grpPtr + 1 == grpEnd we can skip the binary
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   367
     * search because the loop is over.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   368
     */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   369
    if (grpPtr + 1 != grpEnd) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   370
      const size_t *sampleEndPtr =
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   371
          COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   372
      curSampleEnd = *sampleEndPtr;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   373
      curOffsetPtr = sampleEndPtr + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   374
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   375
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   376
  /* At this point we are never going to look at this segment of the suffix
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   377
   * array again.  We take advantage of this fact to save memory.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   378
   * We store the frequency of the dmer in the first position of the group,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   379
   * which is dmerId.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   380
   */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   381
  ctx->suffix[dmerId] = freq;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   382
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   383
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   384
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   385
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   386
 * Selects the best segment in an epoch.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   387
 * Segments of are scored according to the function:
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   388
 *
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   389
 * Let F(d) be the frequency of dmer d.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   390
 * Let S_i be the dmer at position i of segment S which has length k.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   391
 *
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   392
 *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   393
 *
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   394
 * Once the dmer d is in the dictionary we set F(d) = 0.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   395
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   396
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   397
                                           COVER_map_t *activeDmers, U32 begin,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   398
                                           U32 end,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   399
                                           ZDICT_cover_params_t parameters) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   400
  /* Constants */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   401
  const U32 k = parameters.k;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   402
  const U32 d = parameters.d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   403
  const U32 dmersInK = k - d + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   404
  /* Try each segment (activeSegment) and save the best (bestSegment) */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   405
  COVER_segment_t bestSegment = {0, 0, 0};
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   406
  COVER_segment_t activeSegment;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   407
  /* Reset the activeDmers in the segment */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   408
  COVER_map_clear(activeDmers);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   409
  /* The activeSegment starts at the beginning of the epoch. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   410
  activeSegment.begin = begin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   411
  activeSegment.end = begin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   412
  activeSegment.score = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   413
  /* Slide the activeSegment through the whole epoch.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   414
   * Save the best segment in bestSegment.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   415
   */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   416
  while (activeSegment.end < end) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   417
    /* The dmerId for the dmer at the next position */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   418
    U32 newDmer = ctx->dmerAt[activeSegment.end];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   419
    /* The entry in activeDmers for this dmerId */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   420
    U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   421
    /* If the dmer isn't already present in the segment add its score. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   422
    if (*newDmerOcc == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   423
      /* The paper suggest using the L-0.5 norm, but experiments show that it
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   424
       * doesn't help.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   425
       */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   426
      activeSegment.score += freqs[newDmer];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   427
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   428
    /* Add the dmer to the segment */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   429
    activeSegment.end += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   430
    *newDmerOcc += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   431
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   432
    /* If the window is now too large, drop the first position */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   433
    if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   434
      U32 delDmer = ctx->dmerAt[activeSegment.begin];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   435
      U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   436
      activeSegment.begin += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   437
      *delDmerOcc -= 1;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   438
      /* If this is the last occurrence of the dmer, subtract its score */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   439
      if (*delDmerOcc == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   440
        COVER_map_remove(activeDmers, delDmer);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   441
        activeSegment.score -= freqs[delDmer];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   442
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   443
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   444
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   445
    /* If this segment is the best so far save it */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   446
    if (activeSegment.score > bestSegment.score) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   447
      bestSegment = activeSegment;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   448
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   449
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   450
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   451
    /* Trim off the zero frequency head and tail from the segment. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   452
    U32 newBegin = bestSegment.end;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   453
    U32 newEnd = bestSegment.begin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   454
    U32 pos;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   455
    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   456
      U32 freq = freqs[ctx->dmerAt[pos]];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   457
      if (freq != 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   458
        newBegin = MIN(newBegin, pos);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   459
        newEnd = pos + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   460
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   461
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   462
    bestSegment.begin = newBegin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   463
    bestSegment.end = newEnd;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   464
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   465
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   466
    /* Zero out the frequency of each dmer covered by the chosen segment. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   467
    U32 pos;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   468
    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   469
      freqs[ctx->dmerAt[pos]] = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   470
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   471
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   472
  return bestSegment;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   473
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   474
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   475
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   476
 * Check the validity of the parameters.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   477
 * Returns non-zero if the parameters are valid and 0 otherwise.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   478
 */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   479
static int COVER_checkParameters(ZDICT_cover_params_t parameters,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   480
                                 size_t maxDictSize) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   481
  /* k and d are required parameters */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   482
  if (parameters.d == 0 || parameters.k == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   483
    return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   484
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   485
  /* k <= maxDictSize */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   486
  if (parameters.k > maxDictSize) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   487
    return 0;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   488
  }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   489
  /* d <= k */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   490
  if (parameters.d > parameters.k) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   491
    return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   492
  }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   493
  /* 0 < splitPoint <= 1 */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   494
  if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   495
    return 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   496
  }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   497
  return 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   498
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   499
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   500
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   501
 * Clean up a context initialized with `COVER_ctx_init()`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   502
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   503
static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   504
  if (!ctx) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   505
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   506
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   507
  if (ctx->suffix) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   508
    free(ctx->suffix);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   509
    ctx->suffix = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   510
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   511
  if (ctx->freqs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   512
    free(ctx->freqs);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   513
    ctx->freqs = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   514
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   515
  if (ctx->dmerAt) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   516
    free(ctx->dmerAt);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   517
    ctx->dmerAt = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   518
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   519
  if (ctx->offsets) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   520
    free(ctx->offsets);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   521
    ctx->offsets = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   522
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   523
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   524
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   525
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   526
 * Prepare a context for dictionary building.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   527
 * The context is only dependent on the parameter `d` and can used multiple
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   528
 * times.
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   529
 * Returns 0 on success or error code on error.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   530
 * The context must be destroyed with `COVER_ctx_destroy()`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   531
 */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   532
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   533
                          const size_t *samplesSizes, unsigned nbSamples,
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   534
                          unsigned d, double splitPoint) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   535
  const BYTE *const samples = (const BYTE *)samplesBuffer;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   536
  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   537
  /* Split samples into testing and training sets */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   538
  const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   539
  const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   540
  const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   541
  const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   542
  /* Checks */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   543
  if (totalSamplesSize < MAX(d, sizeof(U64)) ||
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   544
      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   545
    DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
   546
                 (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   547
    return ERROR(srcSize_wrong);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   548
  }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   549
  /* Check if there are at least 5 training samples */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   550
  if (nbTrainSamples < 5) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   551
    DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   552
    return ERROR(srcSize_wrong);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   553
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   554
  /* Check if there's testing sample */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   555
  if (nbTestSamples < 1) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   556
    DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   557
    return ERROR(srcSize_wrong);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   558
  }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   559
  /* Zero the context */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   560
  memset(ctx, 0, sizeof(*ctx));
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   561
  DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
   562
               (unsigned)trainingSamplesSize);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   563
  DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
   564
               (unsigned)testSamplesSize);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   565
  ctx->samples = samples;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   566
  ctx->samplesSizes = samplesSizes;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   567
  ctx->nbSamples = nbSamples;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   568
  ctx->nbTrainSamples = nbTrainSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   569
  ctx->nbTestSamples = nbTestSamples;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   570
  /* Partial suffix array */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   571
  ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   572
  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   573
  /* Maps index to the dmerID */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   574
  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   575
  /* The offsets of each file */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   576
  ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   577
  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   578
    DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   579
    COVER_ctx_destroy(ctx);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   580
    return ERROR(memory_allocation);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   581
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   582
  ctx->freqs = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   583
  ctx->d = d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   584
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   585
  /* Fill offsets from the samplesSizes */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   586
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   587
    U32 i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   588
    ctx->offsets[0] = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   589
    for (i = 1; i <= nbSamples; ++i) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   590
      ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   591
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   592
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   593
  DISPLAYLEVEL(2, "Constructing partial suffix array\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   594
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   595
    /* suffix is a partial suffix array.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   596
     * It only sorts suffixes by their first parameters.d bytes.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   597
     * The sort is stable, so each dmer group is sorted by position in input.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   598
     */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   599
    U32 i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   600
    for (i = 0; i < ctx->suffixSize; ++i) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   601
      ctx->suffix[i] = i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   602
    }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   603
    /* qsort doesn't take an opaque pointer, so pass as a global.
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   604
     * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   605
     */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   606
    g_ctx = ctx;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   607
#if defined(__OpenBSD__)
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   608
    mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   609
          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   610
#else
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   611
    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   612
          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   613
#endif
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   614
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   615
  DISPLAYLEVEL(2, "Computing frequencies\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   616
  /* For each dmer group (group of positions with the same first d bytes):
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   617
   * 1. For each position we set dmerAt[position] = dmerID.  The dmerID is
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   618
   *    (groupBeginPtr - suffix).  This allows us to go from position to
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   619
   *    dmerID so we can look up values in freq.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   620
   * 2. We calculate how many samples the dmer occurs in and save it in
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   621
   *    freqs[dmerId].
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   622
   */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   623
  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   624
                (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   625
  ctx->freqs = ctx->suffix;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   626
  ctx->suffix = NULL;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   627
  return 0;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   628
}
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   629
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   630
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   631
{
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   632
  const double ratio = (double)nbDmers / maxDictSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   633
  if (ratio >= 10) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   634
      return;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   635
  }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   636
  LOCALDISPLAYLEVEL(displayLevel, 1,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   637
                    "WARNING: The maximum dictionary size %u is too large "
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   638
                    "compared to the source size %u! "
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   639
                    "size(source)/size(dictionary) = %f, but it should be >= "
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   640
                    "10! This may lead to a subpar dictionary! We recommend "
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   641
                    "training on sources at least 10x, and up to 100x the "
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   642
                    "size of the dictionary!\n", (U32)maxDictSize,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   643
                    (U32)nbDmers, ratio);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   644
}
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   645
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   646
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   647
                                       U32 nbDmers, U32 k, U32 passes)
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   648
{
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   649
  const U32 minEpochSize = k * 10;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   650
  COVER_epoch_info_t epochs;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   651
  epochs.num = MAX(1, maxDictSize / k / passes);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   652
  epochs.size = nbDmers / epochs.num;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   653
  if (epochs.size >= minEpochSize) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   654
      assert(epochs.size * epochs.num <= nbDmers);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   655
      return epochs;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   656
  }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   657
  epochs.size = MIN(minEpochSize, nbDmers);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   658
  epochs.num = nbDmers / epochs.size;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   659
  assert(epochs.size * epochs.num <= nbDmers);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   660
  return epochs;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   661
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   662
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   663
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   664
 * Given the prepared context build the dictionary.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   665
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   666
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   667
                                    COVER_map_t *activeDmers, void *dictBuffer,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   668
                                    size_t dictBufferCapacity,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   669
                                    ZDICT_cover_params_t parameters) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   670
  BYTE *const dict = (BYTE *)dictBuffer;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   671
  size_t tail = dictBufferCapacity;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   672
  /* Divide the data into epochs. We will select one segment from each epoch. */
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   673
  const COVER_epoch_info_t epochs = COVER_computeEpochs(
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   674
      (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   675
  const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3));
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   676
  size_t zeroScoreRun = 0;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   677
  size_t epoch;
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
   678
  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   679
                (U32)epochs.num, (U32)epochs.size);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   680
  /* Loop through the epochs until there are no more segments or the dictionary
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   681
   * is full.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   682
   */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   683
  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   684
    const U32 epochBegin = (U32)(epoch * epochs.size);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   685
    const U32 epochEnd = epochBegin + epochs.size;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   686
    size_t segmentSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   687
    /* Select a segment */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   688
    COVER_segment_t segment = COVER_selectSegment(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   689
        ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   690
    /* If the segment covers no dmers, then we are out of content.
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   691
     * There may be new content in other epochs, for continue for some time.
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   692
     */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   693
    if (segment.score == 0) {
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   694
      if (++zeroScoreRun >= maxZeroScoreRun) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   695
          break;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   696
      }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   697
      continue;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   698
    }
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   699
    zeroScoreRun = 0;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   700
    /* Trim the segment if necessary and if it is too small then we are done */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   701
    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   702
    if (segmentSize < parameters.d) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   703
      break;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   704
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   705
    /* We fill the dictionary from the back to allow the best segments to be
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   706
     * referenced with the smallest offsets.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   707
     */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   708
    tail -= segmentSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   709
    memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   710
    DISPLAYUPDATE(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   711
        2, "\r%u%%       ",
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
   712
        (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   713
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   714
  DISPLAYLEVEL(2, "\r%79s\r", "");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   715
  return tail;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   716
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   717
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   718
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   719
    void *dictBuffer, size_t dictBufferCapacity,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   720
    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   721
    ZDICT_cover_params_t parameters)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   722
{
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   723
  BYTE* const dict = (BYTE*)dictBuffer;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   724
  COVER_ctx_t ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   725
  COVER_map_t activeDmers;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   726
  parameters.splitPoint = 1.0;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   727
  /* Initialize global data */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   728
  g_displayLevel = parameters.zParams.notificationLevel;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   729
  /* Checks */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   730
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   731
    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   732
    return ERROR(parameter_outOfBound);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   733
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   734
  if (nbSamples == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   735
    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   736
    return ERROR(srcSize_wrong);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   737
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   738
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   739
    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   740
                 ZDICT_DICTSIZE_MIN);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   741
    return ERROR(dstSize_tooSmall);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   742
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   743
  /* Initialize context and activeDmers */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   744
  {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   745
    size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   746
                      parameters.d, parameters.splitPoint);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   747
    if (ZSTD_isError(initVal)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   748
      return initVal;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   749
    }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   750
  }
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   751
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   752
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   753
    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   754
    COVER_ctx_destroy(&ctx);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   755
    return ERROR(memory_allocation);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   756
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   757
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   758
  DISPLAYLEVEL(2, "Building dictionary\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   759
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   760
    const size_t tail =
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   761
        COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   762
                              dictBufferCapacity, parameters);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   763
    const size_t dictionarySize = ZDICT_finalizeDictionary(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   764
        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   765
        samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   766
    if (!ZSTD_isError(dictionarySize)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   767
      DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
   768
                   (unsigned)dictionarySize);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   769
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   770
    COVER_ctx_destroy(&ctx);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   771
    COVER_map_destroy(&activeDmers);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   772
    return dictionarySize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   773
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   774
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   775
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   776
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   777
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   778
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   779
                                    const size_t *samplesSizes, const BYTE *samples,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   780
                                    size_t *offsets,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   781
                                    size_t nbTrainSamples, size_t nbSamples,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   782
                                    BYTE *const dict, size_t dictBufferCapacity) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   783
  size_t totalCompressedSize = ERROR(GENERIC);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   784
  /* Pointers */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   785
  ZSTD_CCtx *cctx;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   786
  ZSTD_CDict *cdict;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   787
  void *dst;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   788
  /* Local variables */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   789
  size_t dstCapacity;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   790
  size_t i;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   791
  /* Allocate dst with enough space to compress the maximum sized sample */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   792
  {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   793
    size_t maxSampleSize = 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   794
    i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   795
    for (; i < nbSamples; ++i) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   796
      maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   797
    }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   798
    dstCapacity = ZSTD_compressBound(maxSampleSize);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   799
    dst = malloc(dstCapacity);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   800
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   801
  /* Create the cctx and cdict */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   802
  cctx = ZSTD_createCCtx();
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   803
  cdict = ZSTD_createCDict(dict, dictBufferCapacity,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   804
                           parameters.zParams.compressionLevel);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   805
  if (!dst || !cctx || !cdict) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   806
    goto _compressCleanup;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   807
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   808
  /* Compress each sample and sum their sizes (or error) */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   809
  totalCompressedSize = dictBufferCapacity;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   810
  i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   811
  for (; i < nbSamples; ++i) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   812
    const size_t size = ZSTD_compress_usingCDict(
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   813
        cctx, dst, dstCapacity, samples + offsets[i],
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   814
        samplesSizes[i], cdict);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   815
    if (ZSTD_isError(size)) {
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   816
      totalCompressedSize = size;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   817
      goto _compressCleanup;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   818
    }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   819
    totalCompressedSize += size;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   820
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   821
_compressCleanup:
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   822
  ZSTD_freeCCtx(cctx);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   823
  ZSTD_freeCDict(cdict);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   824
  if (dst) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   825
    free(dst);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   826
  }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   827
  return totalCompressedSize;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   828
}
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   829
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   830
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   831
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   832
 * Initialize the `COVER_best_t`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   833
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   834
void COVER_best_init(COVER_best_t *best) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   835
  if (best==NULL) return; /* compatible with init on NULL */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   836
  (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   837
  (void)ZSTD_pthread_cond_init(&best->cond, NULL);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   838
  best->liveJobs = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   839
  best->dict = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   840
  best->dictSize = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   841
  best->compressedSize = (size_t)-1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   842
  memset(&best->parameters, 0, sizeof(best->parameters));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   843
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   844
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   845
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   846
 * Wait until liveJobs == 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   847
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   848
void COVER_best_wait(COVER_best_t *best) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   849
  if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   850
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   851
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   852
  ZSTD_pthread_mutex_lock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   853
  while (best->liveJobs != 0) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   854
    ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   855
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   856
  ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   857
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   858
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   859
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   860
 * Call COVER_best_wait() and then destroy the COVER_best_t.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   861
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   862
void COVER_best_destroy(COVER_best_t *best) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   863
  if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   864
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   865
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   866
  COVER_best_wait(best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   867
  if (best->dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   868
    free(best->dict);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   869
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   870
  ZSTD_pthread_mutex_destroy(&best->mutex);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   871
  ZSTD_pthread_cond_destroy(&best->cond);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   872
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   873
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   874
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   875
 * Called when a thread is about to be launched.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   876
 * Increments liveJobs.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   877
 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   878
void COVER_best_start(COVER_best_t *best) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   879
  if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   880
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   881
  }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   882
  ZSTD_pthread_mutex_lock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   883
  ++best->liveJobs;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   884
  ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   885
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   886
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   887
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   888
 * Called when a thread finishes executing, both on error or success.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   889
 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   890
 * If this dictionary is the best so far save it and its parameters.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   891
 */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   892
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   893
                              COVER_dictSelection_t selection) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   894
  void* dict = selection.dictContent;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   895
  size_t compressedSize = selection.totalCompressedSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   896
  size_t dictSize = selection.dictSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   897
  if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   898
    return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   899
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   900
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   901
    size_t liveJobs;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   902
    ZSTD_pthread_mutex_lock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   903
    --best->liveJobs;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   904
    liveJobs = best->liveJobs;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   905
    /* If the new dictionary is better */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   906
    if (compressedSize < best->compressedSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   907
      /* Allocate space if necessary */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   908
      if (!best->dict || best->dictSize < dictSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   909
        if (best->dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   910
          free(best->dict);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   911
        }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   912
        best->dict = malloc(dictSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   913
        if (!best->dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   914
          best->compressedSize = ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   915
          best->dictSize = 0;
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
   916
          ZSTD_pthread_cond_signal(&best->cond);
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
   917
          ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   918
          return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   919
        }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   920
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   921
      /* Save the dictionary, parameters, and size */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   922
      if (dict) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   923
        memcpy(best->dict, dict, dictSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   924
        best->dictSize = dictSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   925
        best->parameters = parameters;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   926
        best->compressedSize = compressedSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   927
      }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   928
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   929
    if (liveJobs == 0) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
   930
      ZSTD_pthread_cond_broadcast(&best->cond);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   931
    }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
   932
    ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   933
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   934
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
   935
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   936
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   937
    COVER_dictSelection_t selection = { NULL, 0, error };
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   938
    return selection;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   939
}
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   940
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   941
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   942
  return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   943
}
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   944
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   945
void COVER_dictSelectionFree(COVER_dictSelection_t selection){
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   946
  free(selection.dictContent);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   947
}
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   948
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   949
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   950
        size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   951
        size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   952
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   953
  size_t largestDict = 0;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   954
  size_t largestCompressed = 0;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   955
  BYTE* customDictContentEnd = customDictContent + dictContentSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   956
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   957
  BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   958
  BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   959
  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   960
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   961
  if (!largestDictbuffer || !candidateDictBuffer) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   962
    free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   963
    free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   964
    return COVER_dictSelectionError(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   965
  }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   966
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   967
  /* Initial dictionary size and compressed size */
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   968
  memcpy(largestDictbuffer, customDictContent, dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   969
  dictContentSize = ZDICT_finalizeDictionary(
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   970
    largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   971
    samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   972
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   973
  if (ZDICT_isError(dictContentSize)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   974
    free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   975
    free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   976
    return COVER_dictSelectionError(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   977
  }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   978
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   979
  totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   980
                                                       samplesBuffer, offsets,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   981
                                                       nbCheckSamples, nbSamples,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   982
                                                       largestDictbuffer, dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   983
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   984
  if (ZSTD_isError(totalCompressedSize)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   985
    free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   986
    free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   987
    return COVER_dictSelectionError(totalCompressedSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   988
  }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   989
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   990
  if (params.shrinkDict == 0) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   991
    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   992
    free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   993
    return selection;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   994
  }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   995
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   996
  largestDict = dictContentSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   997
  largestCompressed = totalCompressedSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   998
  dictContentSize = ZDICT_DICTSIZE_MIN;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
   999
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1000
  /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1001
  while (dictContentSize < largestDict) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1002
    memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1003
    dictContentSize = ZDICT_finalizeDictionary(
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1004
      candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1005
      samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1006
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1007
    if (ZDICT_isError(dictContentSize)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1008
      free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1009
      free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1010
      return COVER_dictSelectionError(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1011
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1012
    }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1013
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1014
    totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1015
                                                         samplesBuffer, offsets,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1016
                                                         nbCheckSamples, nbSamples,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1017
                                                         candidateDictBuffer, dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1018
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1019
    if (ZSTD_isError(totalCompressedSize)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1020
      free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1021
      free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1022
      return COVER_dictSelectionError(totalCompressedSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1023
    }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1024
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1025
    if (totalCompressedSize <= largestCompressed * regressionTolerance) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1026
      COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1027
      free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1028
      return selection;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1029
    }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1030
    dictContentSize *= 2;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1031
  }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1032
  dictContentSize = largestDict;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1033
  totalCompressedSize = largestCompressed;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1034
  {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1035
    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1036
    free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1037
    return selection;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1038
  }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1039
}
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1040
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1041
/**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1042
 * Parameters for COVER_tryParameters().
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1043
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1044
typedef struct COVER_tryParameters_data_s {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1045
  const COVER_ctx_t *ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1046
  COVER_best_t *best;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1047
  size_t dictBufferCapacity;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1048
  ZDICT_cover_params_t parameters;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1049
} COVER_tryParameters_data_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1050
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1051
/**
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
  1052
 * Tries a set of parameters and updates the COVER_best_t with the results.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1053
 * This function is thread safe if zstd is compiled with multithreaded support.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1054
 * It takes its parameters as an *OWNING* opaque pointer to support threading.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1055
 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1056
static void COVER_tryParameters(void *opaque) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1057
  /* Save parameters as local variables */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1058
  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1059
  const COVER_ctx_t *const ctx = data->ctx;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1060
  const ZDICT_cover_params_t parameters = data->parameters;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1061
  size_t dictBufferCapacity = data->dictBufferCapacity;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1062
  size_t totalCompressedSize = ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1063
  /* Allocate space for hash table, dict, and freqs */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1064
  COVER_map_t activeDmers;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1065
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1066
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1067
  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1068
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1069
    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1070
    goto _cleanup;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1071
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1072
  if (!dict || !freqs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1073
    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1074
    goto _cleanup;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1075
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1076
  /* Copy the frequencies because we need to modify them */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1077
  memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1078
  /* Build the dictionary */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1079
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1080
    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1081
                                              dictBufferCapacity, parameters);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1082
    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1083
        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1084
        totalCompressedSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1085
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1086
    if (COVER_dictSelectionIsError(selection)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1087
      DISPLAYLEVEL(1, "Failed to select dictionary\n");
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1088
      goto _cleanup;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1089
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1090
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1091
_cleanup:
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1092
  free(dict);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1093
  COVER_best_finish(data->best, parameters, selection);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1094
  free(data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1095
  COVER_map_destroy(&activeDmers);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1096
  COVER_dictSelectionFree(selection);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1097
  if (freqs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1098
    free(freqs);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1099
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1100
}
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1101
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1102
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1103
    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1104
    const size_t *samplesSizes, unsigned nbSamples,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1105
    ZDICT_cover_params_t *parameters) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1106
  /* constants */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1107
  const unsigned nbThreads = parameters->nbThreads;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
  1108
  const double splitPoint =
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
  1109
      parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1110
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1111
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1112
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1113
  const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1114
  const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1115
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1116
  const unsigned kIterations =
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1117
      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1118
  const unsigned shrinkDict = 0;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1119
  /* Local variables */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1120
  const int displayLevel = parameters->zParams.notificationLevel;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1121
  unsigned iteration = 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1122
  unsigned d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1123
  unsigned k;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1124
  COVER_best_t best;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1125
  POOL_ctx *pool = NULL;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1126
  int warned = 0;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1127
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1128
  /* Checks */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
  1129
  if (splitPoint <= 0 || splitPoint > 1) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
  1130
    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1131
    return ERROR(parameter_outOfBound);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
  1132
  }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1133
  if (kMinK < kMaxD || kMaxK < kMinK) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1134
    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1135
    return ERROR(parameter_outOfBound);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1136
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1137
  if (nbSamples == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1138
    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1139
    return ERROR(srcSize_wrong);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1140
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1141
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1142
    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1143
                 ZDICT_DICTSIZE_MIN);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1144
    return ERROR(dstSize_tooSmall);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1145
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1146
  if (nbThreads > 1) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1147
    pool = POOL_create(nbThreads, 1);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1148
    if (!pool) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1149
      return ERROR(memory_allocation);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1150
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1151
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1152
  /* Initialization */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1153
  COVER_best_init(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1154
  /* Turn down global display level to clean up display at level 2 and below */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1155
  g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1156
  /* Loop through d first because each new value needs a new context */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1157
  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1158
                    kIterations);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1159
  for (d = kMinD; d <= kMaxD; d += 2) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1160
    /* Initialize the context for this value of d */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1161
    COVER_ctx_t ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1162
    LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1163
    {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1164
      const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1165
      if (ZSTD_isError(initVal)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1166
        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1167
        COVER_best_destroy(&best);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1168
        POOL_free(pool);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1169
        return initVal;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1170
      }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1171
    }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1172
    if (!warned) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1173
      COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1174
      warned = 1;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1175
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1176
    /* Loop through k reusing the same context */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1177
    for (k = kMinK; k <= kMaxK; k += kStepSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1178
      /* Prepare the arguments */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1179
      COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1180
          sizeof(COVER_tryParameters_data_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1181
      LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1182
      if (!data) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1183
        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1184
        COVER_best_destroy(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1185
        COVER_ctx_destroy(&ctx);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1186
        POOL_free(pool);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1187
        return ERROR(memory_allocation);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1188
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1189
      data->ctx = &ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1190
      data->best = &best;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1191
      data->dictBufferCapacity = dictBufferCapacity;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1192
      data->parameters = *parameters;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1193
      data->parameters.k = k;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1194
      data->parameters.d = d;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
  1195
      data->parameters.splitPoint = splitPoint;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1196
      data->parameters.steps = kSteps;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
  1197
      data->parameters.shrinkDict = shrinkDict;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1198
      data->parameters.zParams.notificationLevel = g_displayLevel;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1199
      /* Check the parameters */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1200
      if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1201
        DISPLAYLEVEL(1, "Cover parameters incorrect\n");
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1202
        free(data);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1203
        continue;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1204
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1205
      /* Call the function and pass ownership of data to it */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1206
      COVER_best_start(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1207
      if (pool) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1208
        POOL_add(pool, &COVER_tryParameters, data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1209
      } else {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1210
        COVER_tryParameters(data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1211
      }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1212
      /* Print status */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1213
      LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
  1214
                         (unsigned)((iteration * 100) / kIterations));
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1215
      ++iteration;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1216
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1217
    COVER_best_wait(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1218
    COVER_ctx_destroy(&ctx);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1219
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1220
  LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1221
  /* Fill the output buffer and parameters with output of the best parameters */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1222
  {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1223
    const size_t dictSize = best.dictSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1224
    if (ZSTD_isError(best.compressedSize)) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1225
      const size_t compressedSize = best.compressedSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1226
      COVER_best_destroy(&best);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1227
      POOL_free(pool);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
  1228
      return compressedSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1229
    }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1230
    *parameters = best.parameters;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1231
    memcpy(dictBuffer, best.dict, dictSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1232
    COVER_best_destroy(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1233
    POOL_free(pool);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1234
    return dictSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1235
  }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
  1236
}