# HG changeset patch
# User Martin von Zweigbergk <martinvonz@google.com>
# Date 1487007856 28800
# Node ID 455677a7667f6658be5c6e3f3d7b81a29cf5f9a2
# Parent  72f25e17af9d6a206ea374c30f229ae9513f3f23# Parent  f3807a135e43b0499db12dc79697e0b9589dd095
merge with stable

diff -r f3807a135e43 -r 455677a7667f contrib/check-code.py
--- a/contrib/check-code.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/check-code.py	Mon Feb 13 09:44:16 2017 -0800
@@ -237,7 +237,7 @@
     (r'lambda\s*\(.*,.*\)',
      "tuple parameter unpacking not available in Python 3+"),
     (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
-    (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
+    (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
     (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
      'dict-from-generator'),
     (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
diff -r f3807a135e43 -r 455677a7667f contrib/chg/chg.c
--- a/contrib/chg/chg.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/chg/chg.c	Mon Feb 13 09:44:16 2017 -0800
@@ -128,6 +128,24 @@
 		abortmsg("insecure sockdir %s", sockdir);
 }
 
+/*
+ * Check if a socket directory exists and is only owned by the current user.
+ * Return 1 if so, 0 if not. This is used to check if XDG_RUNTIME_DIR can be
+ * used or not. According to the specification [1], XDG_RUNTIME_DIR should be
+ * ignored if the directory is not owned by the user with mode 0700.
+ * [1]: https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+ */
+static int checkruntimedir(const char *sockdir)
+{
+	struct stat st;
+	int r = lstat(sockdir, &st);
+	if (r < 0) /* ex. does not exist */
+		return 0;
+	if (!S_ISDIR(st.st_mode)) /* ex. is a file, not a directory */
+		return 0;
+	return st.st_uid == geteuid() && (st.st_mode & 0777) == 0700;
+}
+
 static void getdefaultsockdir(char sockdir[], size_t size)
 {
 	/* by default, put socket file in secure directory
@@ -135,7 +153,7 @@
 	 * (permission of socket file may be ignored on some Unices) */
 	const char *runtimedir = getenv("XDG_RUNTIME_DIR");
 	int r;
-	if (runtimedir) {
+	if (runtimedir && checkruntimedir(runtimedir)) {
 		r = snprintf(sockdir, size, "%s/chg", runtimedir);
 	} else {
 		const char *tmpdir = getenv("TMPDIR");
diff -r f3807a135e43 -r 455677a7667f contrib/perf.py
--- a/contrib/perf.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/perf.py	Mon Feb 13 09:44:16 2017 -0800
@@ -993,6 +993,26 @@
     node = r.lookup(rev)
     rev = r.rev(node)
 
+    def getrawchunks(data, chain):
+        start = r.start
+        length = r.length
+        inline = r._inline
+        iosize = r._io.size
+        buffer = util.buffer
+        offset = start(chain[0])
+
+        chunks = []
+        ladd = chunks.append
+
+        for rev in chain:
+            chunkstart = start(rev)
+            if inline:
+                chunkstart += (rev + 1) * iosize
+            chunklength = length(rev)
+            ladd(buffer(data, chunkstart - offset, chunklength))
+
+        return chunks
+
     def dodeltachain(rev):
         if not cache:
             r.clearcaches()
@@ -1003,24 +1023,15 @@
             r.clearcaches()
         r._chunkraw(chain[0], chain[-1])
 
-    def dodecompress(data, chain):
+    def dorawchunks(data, chain):
         if not cache:
             r.clearcaches()
-
-        start = r.start
-        length = r.length
-        inline = r._inline
-        iosize = r._io.size
-        buffer = util.buffer
-        offset = start(chain[0])
+        getrawchunks(data, chain)
 
-        for rev in chain:
-            chunkstart = start(rev)
-            if inline:
-                chunkstart += (rev + 1) * iosize
-            chunklength = length(rev)
-            b = buffer(data, chunkstart - offset, chunklength)
-            r.decompress(b)
+    def dodecompress(chunks):
+        decomp = r.decompress
+        for chunk in chunks:
+            decomp(chunk)
 
     def dopatch(text, bins):
         if not cache:
@@ -1039,6 +1050,7 @@
 
     chain = r._deltachain(rev)[0]
     data = r._chunkraw(chain[0], chain[-1])[1]
+    rawchunks = getrawchunks(data, chain)
     bins = r._chunks(chain)
     text = str(bins[0])
     bins = bins[1:]
@@ -1048,7 +1060,8 @@
         (lambda: dorevision(), 'full'),
         (lambda: dodeltachain(rev), 'deltachain'),
         (lambda: doread(chain), 'read'),
-        (lambda: dodecompress(data, chain), 'decompress'),
+        (lambda: dorawchunks(data, chain), 'rawchunks'),
+        (lambda: dodecompress(rawchunks), 'decompress'),
         (lambda: dopatch(text, bins), 'patch'),
         (lambda: dohash(text), 'hash'),
     ]
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/NEWS.rst
--- a/contrib/python-zstandard/NEWS.rst	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/NEWS.rst	Mon Feb 13 09:44:16 2017 -0800
@@ -1,6 +1,33 @@
 Version History
 ===============
 
+0.7.0 (released 2017-02-07)
+---------------------------
+
+* Added zstd.get_frame_parameters() to obtain info about a zstd frame.
+* Added ZstdDecompressor.decompress_content_dict_chain() for efficient
+  decompression of *content-only dictionary chains*.
+* CFFI module fully implemented; all tests run against both C extension and
+  CFFI implementation.
+* Vendored version of zstd updated to 1.1.3.
+* Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
+  to avoid extra memory allocation of dict data.
+* Add function names to error messages (by using ":name" in PyArg_Parse*
+  functions).
+* Reuse decompression context across operations. Previously, we created a
+  new ZSTD_DCtx for each decompress(). This was measured to slow down
+  decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
+  is not thread safe. So we reuse the ZSTD_DCtx across operations and make
+  things faster in the process.
+* ZstdCompressor.write_to()'s compress() and flush() methods now return number
+  of bytes written.
+* ZstdDecompressor.write_to()'s write() method now returns the number of bytes
+  written to the underlying output object.
+* CompressionParameters instances now expose their values as attributes.
+* CompressionParameters instances no longer are subscriptable nor behave
+  as tuples (backwards incompatible). Use attributes to obtain values.
+* DictParameters instances now expose their values as attributes.
+
 0.6.0 (released 2017-01-14)
 ---------------------------
 
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/README.rst
--- a/contrib/python-zstandard/README.rst	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/README.rst	Mon Feb 13 09:44:16 2017 -0800
@@ -4,10 +4,11 @@
 
 This project provides Python bindings for interfacing with the
 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
-and CFFI interface is provided.
+and CFFI interface are provided.
 
-The primary goal of the extension is to provide a Pythonic interface to
-the underlying C API. This means exposing most of the features and flexibility
+The primary goal of the project is to provide a rich interface to the
+underlying C API through a Pythonic interface while not sacrificing
+performance. This means exposing most of the features and flexibility
 of the C API while not sacrificing usability or safety that Python provides.
 
 The canonical home for this project is
@@ -23,6 +24,9 @@
 may be some backwards incompatible changes before 1.0. Though the author
 does not intend to make any major changes to the Python API.
 
+This project is vendored and distributed with Mercurial 4.1, where it is
+used in a production capacity.
+
 There is continuous integration for Python versions 2.6, 2.7, and 3.3+
 on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
 confident the extension is stable and works as advertised on these
@@ -48,14 +52,15 @@
 support compression without the framing headers. But the author doesn't
 believe it a high priority at this time.
 
-The CFFI bindings are half-baked and need to be finished.
+The CFFI bindings are feature complete and all tests run against both
+the C extension and CFFI bindings to ensure behavior parity.
 
 Requirements
 ============
 
-This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, and 3.5
-on common platforms (Linux, Windows, and OS X). Only x86_64 is currently
-well-tested as an architecture.
+This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and
+3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is
+currently well-tested as an architecture.
 
 Installing
 ==========
@@ -106,15 +111,11 @@
 Comparison to Other Python Bindings
 ===================================
 
-https://pypi.python.org/pypi/zstd is an alternative Python binding to
+https://pypi.python.org/pypi/zstd is an alternate Python binding to
 Zstandard. At the time this was written, the latest release of that
-package (1.0.0.2) had the following significant differences from this package:
-
-* It only exposes the simple API for compression and decompression operations.
-  This extension exposes the streaming API, dictionary training, and more.
-* It adds a custom framing header to compressed data and there is no way to
-  disable it. This means that data produced with that module cannot be used by
-  other Zstandard implementations.
+package (1.1.2) only exposed the simple APIs for compression and decompression.
+This package exposes much more of the zstd API, including streaming and
+dictionary compression. This package also has CFFI support.
 
 Bundling of Zstandard Source Code
 =================================
@@ -260,6 +261,10 @@
 compressor's internal state into the output object. This may result in 0 or
 more ``write()`` calls to the output object.
 
+Both ``write()`` and ``flush()`` return the number of bytes written to the
+object's ``write()``. In many cases, small inputs do not accumulate enough
+data to cause a write and ``write()`` will return ``0``.
+
 If the size of the data being fed to this streaming compressor is known,
 you can declare it before compression begins::
 
@@ -476,6 +481,10 @@
 the decompressor by calling ``write(data)`` and decompressed output is written
 to the output object by calling its ``write(data)`` method.
 
+Calls to ``write()`` will return the number of bytes written to the output
+object. Not all inputs will result in bytes being written, so return values
+of ``0`` are possible.
+
 The size of chunks being ``write()`` to the destination can be specified::
 
     dctx = zstd.ZstdDecompressor()
@@ -576,6 +585,53 @@
    data = dobj.decompress(compressed_chunk_0)
    data = dobj.decompress(compressed_chunk_1)
 
+Content-Only Dictionary Chain Decompression
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``decompress_content_dict_chain(frames)`` performs decompression of a list of
+zstd frames produced using chained *content-only* dictionary compression. Such
+a list of frames is produced by compressing discrete inputs where each
+non-initial input is compressed with a *content-only* dictionary consisting
+of the content of the previous input.
+
+For example, say you have the following inputs::
+
+   inputs = [b'input 1', b'input 2', b'input 3']
+
+The zstd frame chain consists of:
+
+1. ``b'input 1'`` compressed in standalone/discrete mode
+2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary
+3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary
+
+Each zstd frame **must** have the content size written.
+
+The following Python code can be used to produce a *content-only dictionary
+chain*::
+
+	def make_chain(inputs):
+	    frames = []
+
+		# First frame is compressed in standalone/discrete mode.
+		zctx = zstd.ZstdCompressor(write_content_size=True)
+		frames.append(zctx.compress(inputs[0]))
+
+		# Subsequent frames use the previous fulltext as a content-only dictionary
+		for i, raw in enumerate(inputs[1:]):
+		    dict_data = zstd.ZstdCompressionDict(inputs[i])
+			zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
+			frames.append(zctx.compress(raw))
+
+		return frames
+
+``decompress_content_dict_chain()`` returns the uncompressed data of the last
+element in the input chain.
+
+It is possible to implement *content-only dictionary chain* decompression
+on top of other Python APIs. However, this function will likely be significantly
+faster, especially for long input chains, as it avoids the overhead of
+instantiating and passing around intermediate objects between C and Python.
+
 Choosing an API
 ---------------
 
@@ -634,6 +690,13 @@
 
    dict_data = zstd.ZstdCompressionDict(data)
 
+It is possible to construct a dictionary from *any* data. Unless the
+data begins with a magic header, the dictionary will be treated as
+*content-only*. *Content-only* dictionaries allow compression operations
+that follow to reference raw data within the content. For one use of
+*content-only* dictionaries, see
+``ZstdDecompressor.decompress_content_dict_chain()``.
+
 More interestingly, instances can be created by *training* on sample data::
 
    dict_data = zstd.train_dictionary(size, samples)
@@ -700,19 +763,57 @@
 
     cctx = zstd.ZstdCompressor(compression_params=params)
 
-The members of the ``CompressionParameters`` tuple are as follows::
+The members/attributes of ``CompressionParameters`` instances are as follows::
 
-* 0 - Window log
-* 1 - Chain log
-* 2 - Hash log
-* 3 - Search log
-* 4 - Search length
-* 5 - Target length
-* 6 - Strategy (one of the ``zstd.STRATEGY_`` constants)
+* window_log
+* chain_log
+* hash_log
+* search_log
+* search_length
+* target_length
+* strategy
+
+This is the order the arguments are passed to the constructor if not using
+named arguments.
 
 You'll need to read the Zstandard documentation for what these parameters
 do.
 
+Frame Inspection
+----------------
+
+Data emitted from zstd compression is encapsulated in a *frame*. This frame
+begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
+the frame in more detail. For more info, see
+https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
+
+``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
+instance and return a ``FrameParameters`` object describing the frame.
+
+Depending on which fields are present in the frame and their values, the
+length of the frame parameters varies. If insufficient bytes are passed
+in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
+frame parameters can be parsed, pass in at least 18 bytes.
+
+``FrameParameters`` instances have the following attributes:
+
+content_size
+   Integer size of original, uncompressed content. This will be ``0`` if the
+   original content size isn't written to the frame (controlled with the
+   ``write_content_size`` argument to ``ZstdCompressor``) or if the input
+   content size was ``0``.
+
+window_size
+   Integer size of maximum back-reference distance in compressed data.
+
+dict_id
+   Integer of dictionary ID used for compression. ``0`` if no dictionary
+   ID was used or if the dictionary ID was ``0``.
+
+has_checksum
+   Bool indicating whether a 4 byte content checksum is stored at the end
+   of the frame.
+
 Misc Functionality
 ------------------
 
@@ -776,19 +877,32 @@
 TARGETLENGTH_MAX
     Maximum value for compression parameter
 STRATEGY_FAST
-    Compression strategory
+    Compression strategy
 STRATEGY_DFAST
-    Compression strategory
+    Compression strategy
 STRATEGY_GREEDY
-    Compression strategory
+    Compression strategy
 STRATEGY_LAZY
-    Compression strategory
+    Compression strategy
 STRATEGY_LAZY2
-    Compression strategory
+    Compression strategy
 STRATEGY_BTLAZY2
-    Compression strategory
+    Compression strategy
 STRATEGY_BTOPT
-    Compression strategory
+    Compression strategy
+
+Performance Considerations
+--------------------------
+
+The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
+persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
+or ``ZstdDecompressor`` instance for multiple operations is faster than
+instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
+operation. The differences are magnified as the size of data decreases. For
+example, the difference between *context* reuse and non-reuse for 100,000
+100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
+whereas 10 1,000,000 byte inputs will be more similar in speed (because the
+time spent doing compression dwarfs time spent creating new *contexts*).
 
 Note on Zstandard's *Experimental* API
 ======================================
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/compressiondict.c
--- a/contrib/python-zstandard/c-ext/compressiondict.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/compressiondict.c	Mon Feb 13 09:44:16 2017 -0800
@@ -28,7 +28,8 @@
 	void* dict;
 	ZstdCompressionDict* result;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!", kwlist,
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!:train_dictionary",
+		kwlist,
 		&capacity,
 		&PyList_Type, &samples,
 		(PyObject*)&DictParametersType, &parameters)) {
@@ -57,7 +58,6 @@
 		sampleItem = PyList_GetItem(samples, sampleIndex);
 		if (!PyBytes_Check(sampleItem)) {
 			PyErr_SetString(PyExc_ValueError, "samples must be bytes");
-			/* TODO probably need to perform DECREF here */
 			return NULL;
 		}
 		samplesSize += PyBytes_GET_SIZE(sampleItem);
@@ -133,10 +133,11 @@
 	self->dictSize = 0;
 
 #if PY_MAJOR_VERSION >= 3
-	if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+	if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
 #else
-	if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+	if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
 #endif
+		&source, &sourceSize)) {
 		return -1;
 	}
 
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/compressionparams.c
--- a/contrib/python-zstandard/c-ext/compressionparams.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/compressionparams.c	Mon Feb 13 09:44:16 2017 -0800
@@ -25,7 +25,8 @@
 	ZSTD_compressionParameters params;
 	CompressionParametersObject* result;
 
-	if (!PyArg_ParseTuple(args, "i|Kn", &compressionLevel, &sourceSize, &dictSize)) {
+	if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters",
+		&compressionLevel, &sourceSize, &dictSize)) {
 		return NULL;
 	}
 
@@ -47,12 +48,85 @@
 	return result;
 }
 
+static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
+	static char* kwlist[] = {
+		"window_log",
+		"chain_log",
+		"hash_log",
+		"search_log",
+		"search_length",
+		"target_length",
+		"strategy",
+		NULL
+	};
+
+	unsigned windowLog;
+	unsigned chainLog;
+	unsigned hashLog;
+	unsigned searchLog;
+	unsigned searchLength;
+	unsigned targetLength;
+	unsigned strategy;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",
+		kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,
+		&targetLength, &strategy)) {
+		return -1;
+	}
+
+	if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
+		PyErr_SetString(PyExc_ValueError, "invalid window log value");
+		return -1;
+	}
+
+	if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
+		PyErr_SetString(PyExc_ValueError, "invalid chain log value");
+		return -1;
+	}
+
+	if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
+		PyErr_SetString(PyExc_ValueError, "invalid hash log value");
+		return -1;
+	}
+
+	if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
+		PyErr_SetString(PyExc_ValueError, "invalid search log value");
+		return -1;
+	}
+
+	if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
+		PyErr_SetString(PyExc_ValueError, "invalid search length value");
+		return -1;
+	}
+
+	if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
+		PyErr_SetString(PyExc_ValueError, "invalid target length value");
+		return -1;
+	}
+
+	if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
+		PyErr_SetString(PyExc_ValueError, "invalid strategy value");
+		return -1;
+	}
+
+	self->windowLog = windowLog;
+	self->chainLog = chainLog;
+	self->hashLog = hashLog;
+	self->searchLog = searchLog;
+	self->searchLength = searchLength;
+	self->targetLength = targetLength;
+	self->strategy = strategy;
+
+	return 0;
+}
+
 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
 	CompressionParametersObject* params;
 	ZSTD_compressionParameters zparams;
 	PyObject* result;
 
-	if (!PyArg_ParseTuple(args, "O!", &CompressionParametersType, &params)) {
+	if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",
+		&CompressionParametersType, &params)) {
 		return NULL;
 	}
 
@@ -64,113 +138,33 @@
 PyDoc_STRVAR(CompressionParameters__doc__,
 "CompressionParameters: low-level control over zstd compression");
 
-static PyObject* CompressionParameters_new(PyTypeObject* subtype, PyObject* args, PyObject* kwargs) {
-	CompressionParametersObject* self;
-	unsigned windowLog;
-	unsigned chainLog;
-	unsigned hashLog;
-	unsigned searchLog;
-	unsigned searchLength;
-	unsigned targetLength;
-	unsigned strategy;
-
-	if (!PyArg_ParseTuple(args, "IIIIIII", &windowLog, &chainLog, &hashLog, &searchLog,
-		&searchLength, &targetLength, &strategy)) {
-		return NULL;
-	}
-
-	if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
-		PyErr_SetString(PyExc_ValueError, "invalid window log value");
-		return NULL;
-	}
-
-	if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
-		PyErr_SetString(PyExc_ValueError, "invalid chain log value");
-		return NULL;
-	}
-
-	if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
-		PyErr_SetString(PyExc_ValueError, "invalid hash log value");
-		return NULL;
-	}
-
-	if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
-		PyErr_SetString(PyExc_ValueError, "invalid search log value");
-		return NULL;
-	}
-
-	if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
-		PyErr_SetString(PyExc_ValueError, "invalid search length value");
-		return NULL;
-	}
-
-	if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
-		PyErr_SetString(PyExc_ValueError, "invalid target length value");
-		return NULL;
-	}
-
-	if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
-		PyErr_SetString(PyExc_ValueError, "invalid strategy value");
-		return NULL;
-	}
-
-	self = (CompressionParametersObject*)subtype->tp_alloc(subtype, 1);
-	if (!self) {
-		return NULL;
-	}
-
-	self->windowLog = windowLog;
-	self->chainLog = chainLog;
-	self->hashLog = hashLog;
-	self->searchLog = searchLog;
-	self->searchLength = searchLength;
-	self->targetLength = targetLength;
-	self->strategy = strategy;
-
-	return (PyObject*)self;
-}
-
 static void CompressionParameters_dealloc(PyObject* self) {
 	PyObject_Del(self);
 }
 
-static Py_ssize_t CompressionParameters_length(PyObject* self) {
-	return 7;
-}
-
-static PyObject* CompressionParameters_item(PyObject* o, Py_ssize_t i) {
-	CompressionParametersObject* self = (CompressionParametersObject*)o;
-
-	switch (i) {
-	case 0:
-		return PyLong_FromLong(self->windowLog);
-	case 1:
-		return PyLong_FromLong(self->chainLog);
-	case 2:
-		return PyLong_FromLong(self->hashLog);
-	case 3:
-		return PyLong_FromLong(self->searchLog);
-	case 4:
-		return PyLong_FromLong(self->searchLength);
-	case 5:
-		return PyLong_FromLong(self->targetLength);
-	case 6:
-		return PyLong_FromLong(self->strategy);
-	default:
-		PyErr_SetString(PyExc_IndexError, "index out of range");
-		return NULL;
-	}
-}
-
-static PySequenceMethods CompressionParameters_sq = {
-	CompressionParameters_length, /* sq_length */
-	0,							  /* sq_concat */
-	0,                            /* sq_repeat */
-	CompressionParameters_item,   /* sq_item */
-	0,                            /* sq_ass_item */
-	0,                            /* sq_contains */
-	0,                            /* sq_inplace_concat */
-	0                             /* sq_inplace_repeat */
+static PyMemberDef CompressionParameters_members[] = {
+	{ "window_log", T_UINT,
+	  offsetof(CompressionParametersObject, windowLog), READONLY,
+	  "window log" },
+	{ "chain_log", T_UINT,
+	  offsetof(CompressionParametersObject, chainLog), READONLY,
+	  "chain log" },
+	{ "hash_log", T_UINT,
+	  offsetof(CompressionParametersObject, hashLog), READONLY,
+	  "hash log" },
+	{ "search_log", T_UINT,
+	  offsetof(CompressionParametersObject, searchLog), READONLY,
+	  "search log" },
+	{ "search_length", T_UINT,
+	  offsetof(CompressionParametersObject, searchLength), READONLY,
+	  "search length" },
+	{ "target_length", T_UINT,
+	  offsetof(CompressionParametersObject, targetLength), READONLY,
+	  "target length" },
+	{ "strategy", T_INT,
+	  offsetof(CompressionParametersObject, strategy), READONLY,
+	  "strategy" },
+	{ NULL }
 };
 
 PyTypeObject CompressionParametersType = {
@@ -185,7 +179,7 @@
 	0,                         /* tp_compare */
 	0,                         /* tp_repr */
 	0,                         /* tp_as_number */
-	&CompressionParameters_sq, /* tp_as_sequence */
+	0,                         /* tp_as_sequence */
 	0,                         /* tp_as_mapping */
 	0,                         /* tp_hash  */
 	0,                         /* tp_call */
@@ -193,7 +187,7 @@
 	0,                         /* tp_getattro */
 	0,                         /* tp_setattro */
 	0,                         /* tp_as_buffer */
-	Py_TPFLAGS_DEFAULT,        /* tp_flags */
+	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
 	CompressionParameters__doc__, /* tp_doc */
 	0,                         /* tp_traverse */
 	0,                         /* tp_clear */
@@ -202,16 +196,16 @@
 	0,                         /* tp_iter */
 	0,                         /* tp_iternext */
 	0,                         /* tp_methods */
-	0,                         /* tp_members */
+	CompressionParameters_members, /* tp_members */
 	0,                         /* tp_getset */
 	0,                         /* tp_base */
 	0,                         /* tp_dict */
 	0,                         /* tp_descr_get */
 	0,                         /* tp_descr_set */
 	0,                         /* tp_dictoffset */
-	0,                         /* tp_init */
+	(initproc)CompressionParameters_init, /* tp_init */
 	0,                         /* tp_alloc */
-	CompressionParameters_new, /* tp_new */
+	PyType_GenericNew,         /* tp_new */
 };
 
 void compressionparams_module_init(PyObject* mod) {
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/compressionwriter.c
--- a/contrib/python-zstandard/c-ext/compressionwriter.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/compressionwriter.c	Mon Feb 13 09:44:16 2017 -0800
@@ -52,7 +52,7 @@
 	ZSTD_outBuffer output;
 	PyObject* res;
 
-	if (!PyArg_ParseTuple(args, "OOO", &exc_type, &exc_value, &exc_tb)) {
+	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
 		return NULL;
 	}
 
@@ -119,11 +119,12 @@
 	ZSTD_inBuffer input;
 	ZSTD_outBuffer output;
 	PyObject* res;
+	Py_ssize_t totalWrite = 0;
 
 #if PY_MAJOR_VERSION >= 3
-	if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+	if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
 #else
-	if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+	if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
 #endif
 		return NULL;
 	}
@@ -164,20 +165,21 @@
 #endif
 				output.dst, output.pos);
 			Py_XDECREF(res);
+			totalWrite += output.pos;
 		}
 		output.pos = 0;
 	}
 
 	PyMem_Free(output.dst);
 
-	/* TODO return bytes written */
-	Py_RETURN_NONE;
+	return PyLong_FromSsize_t(totalWrite);
 }
 
 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
 	size_t zresult;
 	ZSTD_outBuffer output;
 	PyObject* res;
+	Py_ssize_t totalWrite = 0;
 
 	if (!self->entered) {
 		PyErr_SetString(ZstdError, "flush must be called from an active context manager");
@@ -215,14 +217,14 @@
 #endif
 				output.dst, output.pos);
 			Py_XDECREF(res);
+			totalWrite += output.pos;
 		}
 		output.pos = 0;
 	}
 
 	PyMem_Free(output.dst);
 
-	/* TODO return bytes written */
-	Py_RETURN_NONE;
+	return PyLong_FromSsize_t(totalWrite);
 }
 
 static PyMethodDef ZstdCompressionWriter_methods[] = {
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/compressobj.c
--- a/contrib/python-zstandard/c-ext/compressobj.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/compressobj.c	Mon Feb 13 09:44:16 2017 -0800
@@ -42,9 +42,9 @@
 	}
 
 #if PY_MAJOR_VERSION >= 3
-	if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+	if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
 #else
-	if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+	if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
 #endif
 		return NULL;
 	}
@@ -98,7 +98,7 @@
 	PyObject* result = NULL;
 	Py_ssize_t resultSize = 0;
 
-	if (!PyArg_ParseTuple(args, "|i", &flushMode)) {
+	if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
 		return NULL;
 	}
 
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/compressor.c
--- a/contrib/python-zstandard/c-ext/compressor.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/compressor.c	Mon Feb 13 09:44:16 2017 -0800
@@ -16,7 +16,7 @@
 	Py_BEGIN_ALLOW_THREADS
 	memset(&zmem, 0, sizeof(zmem));
 	compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
-		compressor->dict->dictSize, *zparams, zmem);
+		compressor->dict->dictSize, 1, *zparams, zmem);
 	Py_END_ALLOW_THREADS
 
 	if (!compressor->cdict) {
@@ -128,8 +128,8 @@
 	self->cparams = NULL;
 	self->cdict = NULL;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist,
-		&level, &ZstdCompressionDictType, &dict,
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO:ZstdCompressor",
+		kwlist,	&level, &ZstdCompressionDictType, &dict,
 		&CompressionParametersType, &params,
 		&writeChecksum, &writeContentSize, &writeDictID)) {
 		return -1;
@@ -243,8 +243,8 @@
 	PyObject* totalReadPy;
 	PyObject* totalWritePy;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk", kwlist, &source, &dest, &sourceSize,
-		&inSize, &outSize)) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist,
+		&source, &dest, &sourceSize, &inSize, &outSize)) {
 		return NULL;
 	}
 
@@ -402,9 +402,9 @@
 	ZSTD_parameters zparams;
 
 #if PY_MAJOR_VERSION >= 3
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O",
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress",
 #else
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O",
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress",
 #endif
 		kwlist, &source, &sourceSize, &allowEmpty)) {
 		return NULL;
@@ -512,7 +512,7 @@
 		return NULL;
 	}
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &inSize)) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) {
 		return NULL;
 	}
 
@@ -574,8 +574,8 @@
 	size_t outSize = ZSTD_CStreamOutSize();
 	ZstdCompressorIterator* result;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk", kwlist, &reader, &sourceSize,
-		&inSize, &outSize)) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist,
+		&reader, &sourceSize, &inSize, &outSize)) {
 		return NULL;
 	}
 
@@ -693,8 +693,8 @@
 	Py_ssize_t sourceSize = 0;
 	size_t outSize = ZSTD_CStreamOutSize();
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk", kwlist, &writer, &sourceSize,
-		&outSize)) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist,
+		&writer, &sourceSize, &outSize)) {
 		return NULL;
 	}
 
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/decompressionwriter.c
--- a/contrib/python-zstandard/c-ext/decompressionwriter.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/decompressionwriter.c	Mon Feb 13 09:44:16 2017 -0800
@@ -71,11 +71,12 @@
 	ZSTD_inBuffer input;
 	ZSTD_outBuffer output;
 	PyObject* res;
+	Py_ssize_t totalWrite = 0;
 
 #if PY_MAJOR_VERSION >= 3
-	if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+	if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
 #else
-	if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+	if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
 #endif
 		return NULL;
 	}
@@ -116,15 +117,15 @@
 #endif
 				output.dst, output.pos);
 			Py_XDECREF(res);
+			totalWrite += output.pos;
 			output.pos = 0;
 		}
 	}
 
 	PyMem_Free(output.dst);
 
-	/* TODO return bytes written */
-	Py_RETURN_NONE;
-	}
+	return PyLong_FromSsize_t(totalWrite);
+}
 
 static PyMethodDef ZstdDecompressionWriter_methods[] = {
 	{ "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/decompressobj.c
--- a/contrib/python-zstandard/c-ext/decompressobj.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/decompressobj.c	Mon Feb 13 09:44:16 2017 -0800
@@ -41,9 +41,9 @@
 	}
 
 #if PY_MAJOR_VERSION >= 3
-	if (!PyArg_ParseTuple(args, "y#",
+	if (!PyArg_ParseTuple(args, "y#:decompress",
 #else
-	if (!PyArg_ParseTuple(args, "s#",
+	if (!PyArg_ParseTuple(args, "s#:decompress",
 #endif
 		&source, &sourceSize)) {
 		return NULL;
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/decompressor.c
--- a/contrib/python-zstandard/c-ext/decompressor.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/decompressor.c	Mon Feb 13 09:44:16 2017 -0800
@@ -59,23 +59,19 @@
 
 	ZstdCompressionDict* dict = NULL;
 
-	self->refdctx = NULL;
+	self->dctx = NULL;
 	self->dict = NULL;
 	self->ddict = NULL;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!", kwlist,
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
 		&ZstdCompressionDictType, &dict)) {
 		return -1;
 	}
 
-	/* Instead of creating a ZSTD_DCtx for every decompression operation,
-	   we create an instance at object creation time and recycle it via
-	   ZSTD_copyDCTx() on each use. This means each use is a malloc+memcpy
-	   instead of a malloc+init. */
 	/* TODO lazily initialize the reference ZSTD_DCtx on first use since
 	   not instances of ZstdDecompressor will use a ZSTD_DCtx. */
-	self->refdctx = ZSTD_createDCtx();
-	if (!self->refdctx) {
+	self->dctx = ZSTD_createDCtx();
+	if (!self->dctx) {
 		PyErr_NoMemory();
 		goto except;
 	}
@@ -88,17 +84,17 @@
 	return 0;
 
 except:
-	if (self->refdctx) {
-		ZSTD_freeDCtx(self->refdctx);
-		self->refdctx = NULL;
+	if (self->dctx) {
+		ZSTD_freeDCtx(self->dctx);
+		self->dctx = NULL;
 	}
 
 	return -1;
 }
 
 static void Decompressor_dealloc(ZstdDecompressor* self) {
-	if (self->refdctx) {
-		ZSTD_freeDCtx(self->refdctx);
+	if (self->dctx) {
+		ZSTD_freeDCtx(self->dctx);
 	}
 
 	Py_XDECREF(self->dict);
@@ -150,8 +146,8 @@
 	PyObject* totalReadPy;
 	PyObject* totalWritePy;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk", kwlist, &source,
-		&dest, &inSize, &outSize)) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
+		&source, &dest, &inSize, &outSize)) {
 		return NULL;
 	}
 
@@ -243,7 +239,7 @@
 	Py_DecRef(totalReadPy);
 	Py_DecRef(totalWritePy);
 
-	finally:
+finally:
 	if (output.dst) {
 		PyMem_Free(output.dst);
 	}
@@ -291,28 +287,19 @@
 	unsigned long long decompressedSize;
 	size_t destCapacity;
 	PyObject* result = NULL;
-	ZSTD_DCtx* dctx = NULL;
 	void* dictData = NULL;
 	size_t dictSize = 0;
 	size_t zresult;
 
 #if PY_MAJOR_VERSION >= 3
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n", kwlist,
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
 #else
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n", kwlist,
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
 #endif
-		&source, &sourceSize, &maxOutputSize)) {
+		kwlist, &source, &sourceSize, &maxOutputSize)) {
 		return NULL;
 	}
 
-	dctx = PyMem_Malloc(ZSTD_sizeof_DCtx(self->refdctx));
-	if (!dctx) {
-		PyErr_NoMemory();
-		return NULL;
-	}
-
-	ZSTD_copyDCtx(dctx, self->refdctx);
-
 	if (self->dict) {
 		dictData = self->dict->dictData;
 		dictSize = self->dict->dictSize;
@@ -320,12 +307,12 @@
 
 	if (dictData && !self->ddict) {
 		Py_BEGIN_ALLOW_THREADS
-		self->ddict = ZSTD_createDDict(dictData, dictSize);
+		self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
 		Py_END_ALLOW_THREADS
 
 		if (!self->ddict) {
 			PyErr_SetString(ZstdError, "could not create decompression dict");
-			goto except;
+			return NULL;
 		}
 	}
 
@@ -335,7 +322,7 @@
 		if (0 == maxOutputSize) {
 			PyErr_SetString(ZstdError, "input data invalid or missing content size "
 				"in frame header");
-			goto except;
+			return NULL;
 		}
 		else {
 			result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
@@ -348,45 +335,39 @@
 	}
 
 	if (!result) {
-		goto except;
+		return NULL;
 	}
 
 	Py_BEGIN_ALLOW_THREADS
 	if (self->ddict) {
-		zresult = ZSTD_decompress_usingDDict(dctx, PyBytes_AsString(result), destCapacity,
+		zresult = ZSTD_decompress_usingDDict(self->dctx,
+			PyBytes_AsString(result), destCapacity,
 			source, sourceSize, self->ddict);
 	}
 	else {
-		zresult = ZSTD_decompressDCtx(dctx, PyBytes_AsString(result), destCapacity, source, sourceSize);
+		zresult = ZSTD_decompressDCtx(self->dctx,
+			PyBytes_AsString(result), destCapacity, source, sourceSize);
 	}
 	Py_END_ALLOW_THREADS
 
 	if (ZSTD_isError(zresult)) {
 		PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
-		goto except;
+		Py_DecRef(result);
+		return NULL;
 	}
 	else if (decompressedSize && zresult != decompressedSize) {
 		PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
 			zresult, decompressedSize);
-		goto except;
+		Py_DecRef(result);
+		return NULL;
 	}
 	else if (zresult < destCapacity) {
 		if (_PyBytes_Resize(&result, zresult)) {
-			goto except;
+			Py_DecRef(result);
+			return NULL;
 		}
 	}
 
-	goto finally;
-
-except:
-	Py_DecRef(result);
-	result = NULL;
-
-finally:
-	if (dctx) {
-		PyMem_FREE(dctx);
-	}
-
 	return result;
 }
 
@@ -455,8 +436,8 @@
 	ZstdDecompressorIterator* result;
 	size_t skipBytes = 0;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk", kwlist, &reader,
-		&inSize, &outSize, &skipBytes)) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
+		&reader, &inSize, &outSize, &skipBytes)) {
 		return NULL;
 	}
 
@@ -534,19 +515,14 @@
 	goto finally;
 
 except:
-	if (result->reader) {
-		Py_DECREF(result->reader);
-		result->reader = NULL;
-	}
+	Py_CLEAR(result->reader);
 
 	if (result->buffer) {
 		PyBuffer_Release(result->buffer);
-		Py_DECREF(result->buffer);
-		result->buffer = NULL;
+		Py_CLEAR(result->buffer);
 	}
 
-	Py_DECREF(result);
-	result = NULL;
+	Py_CLEAR(result);
 
 finally:
 
@@ -577,7 +553,8 @@
 	size_t outSize = ZSTD_DStreamOutSize();
 	ZstdDecompressionWriter* result;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k", kwlist, &writer, &outSize)) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
+		&writer, &outSize)) {
 		return NULL;
 	}
 
@@ -605,6 +582,200 @@
 	return result;
 }
 
+PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
+"Decompress a series of chunks using the content dictionary chaining technique\n"
+);
+
+static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
+	static char* kwlist[] = {
+		"frames",
+		NULL
+	};
+
+	PyObject* chunks;
+	Py_ssize_t chunksLen;
+	Py_ssize_t chunkIndex;
+	char parity = 0;
+	PyObject* chunk;
+	char* chunkData;
+	Py_ssize_t chunkSize;
+	ZSTD_DCtx* dctx = NULL;
+	size_t zresult;
+	ZSTD_frameParams frameParams;
+	void* buffer1 = NULL;
+	size_t buffer1Size = 0;
+	size_t buffer1ContentSize = 0;
+	void* buffer2 = NULL;
+	size_t buffer2Size = 0;
+	size_t buffer2ContentSize = 0;
+	void* destBuffer = NULL;
+	PyObject* result = NULL;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
+		kwlist, &PyList_Type, &chunks)) {
+		return NULL;
+	}
+
+	chunksLen = PyList_Size(chunks);
+	if (!chunksLen) {
+		PyErr_SetString(PyExc_ValueError, "empty input chain");
+		return NULL;
+	}
+
+	/* The first chunk should not be using a dictionary. We handle it specially. */
+	chunk = PyList_GetItem(chunks, 0);
+	if (!PyBytes_Check(chunk)) {
+		PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
+		return NULL;
+	}
+
+	/* We require that all chunks be zstd frames and that they have content size set. */
+	PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
+	zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
+	if (ZSTD_isError(zresult)) {
+		PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
+		return NULL;
+	}
+	else if (zresult) {
+		PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
+		return NULL;
+	}
+
+	if (0 == frameParams.frameContentSize) {
+		PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
+		return NULL;
+	}
+
+	dctx = ZSTD_createDCtx();
+	if (!dctx) {
+		PyErr_NoMemory();
+		goto finally;
+	}
+
+	buffer1Size = frameParams.frameContentSize;
+	buffer1 = PyMem_Malloc(buffer1Size);
+	if (!buffer1) {
+		goto finally;
+	}
+
+	Py_BEGIN_ALLOW_THREADS
+	zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
+	Py_END_ALLOW_THREADS
+	if (ZSTD_isError(zresult)) {
+		PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
+		goto finally;
+	}
+
+	buffer1ContentSize = zresult;
+
+	/* Special case of a simple chain. */
+	if (1 == chunksLen) {
+		result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
+		goto finally;
+	}
+
+	/* This should ideally look at next chunk. But this is slightly simpler. */
+	buffer2Size = frameParams.frameContentSize;
+	buffer2 = PyMem_Malloc(buffer2Size);
+	if (!buffer2) {
+		goto finally;
+	}
+
+	/* For each subsequent chunk, use the previous fulltext as a content dictionary.
+	   Our strategy is to have 2 buffers. One holds the previous fulltext (to be
+	   used as a content dictionary) and the other holds the new fulltext. The
+	   buffers grow when needed but never decrease in size. This limits the
+	   memory allocator overhead.
+	*/
+	for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
+		chunk = PyList_GetItem(chunks, chunkIndex);
+		if (!PyBytes_Check(chunk)) {
+			PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
+			goto finally;
+		}
+
+		PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
+		zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
+		if (ZSTD_isError(zresult)) {
+			PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
+			goto finally;
+		}
+		else if (zresult) {
+			PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
+			goto finally;
+		}
+
+		if (0 == frameParams.frameContentSize) {
+			PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
+			goto finally;
+		}
+
+		parity = chunkIndex % 2;
+
+		/* This could definitely be abstracted to reduce code duplication. */
+		if (parity) {
+			/* Resize destination buffer to hold larger content. */
+			if (buffer2Size < frameParams.frameContentSize) {
+				buffer2Size = frameParams.frameContentSize;
+				destBuffer = PyMem_Realloc(buffer2, buffer2Size);
+				if (!destBuffer) {
+					goto finally;
+				}
+				buffer2 = destBuffer;
+			}
+
+			Py_BEGIN_ALLOW_THREADS
+			zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
+				chunkData, chunkSize, buffer1, buffer1ContentSize);
+			Py_END_ALLOW_THREADS
+			if (ZSTD_isError(zresult)) {
+				PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
+					chunkIndex, ZSTD_getErrorName(zresult));
+				goto finally;
+			}
+			buffer2ContentSize = zresult;
+		}
+		else {
+			if (buffer1Size < frameParams.frameContentSize) {
+				buffer1Size = frameParams.frameContentSize;
+				destBuffer = PyMem_Realloc(buffer1, buffer1Size);
+				if (!destBuffer) {
+					goto finally;
+				}
+				buffer1 = destBuffer;
+			}
+
+			Py_BEGIN_ALLOW_THREADS
+			zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
+				chunkData, chunkSize, buffer2, buffer2ContentSize);
+			Py_END_ALLOW_THREADS
+			if (ZSTD_isError(zresult)) {
+				PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
+					chunkIndex, ZSTD_getErrorName(zresult));
+				goto finally;
+			}
+			buffer1ContentSize = zresult;
+		}
+	}
+
+	result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
+		parity ? buffer2ContentSize : buffer1ContentSize);
+
+finally:
+	if (buffer2) {
+		PyMem_Free(buffer2);
+	}
+	if (buffer1) {
+		PyMem_Free(buffer1);
+	}
+
+	if (dctx) {
+		ZSTD_freeDCtx(dctx);
+	}
+
+	return result;
+}
+
 static PyMethodDef Decompressor_methods[] = {
 	{ "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
 	Decompressor_copy_stream__doc__ },
@@ -616,6 +787,8 @@
 	Decompressor_read_from__doc__ },
 	{ "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
 	Decompressor_write_to__doc__ },
+	{ "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
+	  METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
 	{ NULL, NULL }
 };
 
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/dictparams.c
--- a/contrib/python-zstandard/c-ext/dictparams.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/dictparams.c	Mon Feb 13 09:44:16 2017 -0800
@@ -18,8 +18,8 @@
 	unsigned notificationLevel;
 	unsigned dictID;
 
-	if (!PyArg_ParseTuple(args, "IiII", &selectivityLevel, &compressionLevel,
-		&notificationLevel, &dictID)) {
+	if (!PyArg_ParseTuple(args, "IiII:DictParameters",
+		&selectivityLevel, &compressionLevel, &notificationLevel, &dictID)) {
 		return NULL;
 	}
 
@@ -40,6 +40,22 @@
 	PyObject_Del(self);
 }
 
+static PyMemberDef DictParameters_members[] = {
+	{ "selectivity_level", T_UINT,
+	  offsetof(DictParametersObject, selectivityLevel), READONLY,
+	  "selectivity level" },
+	{ "compression_level", T_INT,
+	  offsetof(DictParametersObject, compressionLevel), READONLY,
+	  "compression level" },
+	{ "notification_level", T_UINT,
+	  offsetof(DictParametersObject, notificationLevel), READONLY,
+	  "notification level" },
+	{ "dict_id", T_UINT,
+	  offsetof(DictParametersObject, dictID), READONLY,
+	  "dictionary ID" },
+	{ NULL }
+};
+
 static Py_ssize_t DictParameters_length(PyObject* self) {
 	return 4;
 }
@@ -102,7 +118,7 @@
 	0,                         /* tp_iter */
 	0,                         /* tp_iternext */
 	0,                         /* tp_methods */
-	0,                         /* tp_members */
+	DictParameters_members,    /* tp_members */
 	0,                         /* tp_getset */
 	0,                         /* tp_base */
 	0,                         /* tp_dict */
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/frameparams.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/c-ext/frameparams.c	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,132 @@
+/**
+* Copyright (c) 2017-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(FrameParameters__doc__,
+	"FrameParameters: information about a zstd frame");
+
+FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
+	const char* source;
+	Py_ssize_t sourceSize;
+	ZSTD_frameParams params;
+	FrameParametersObject* result = NULL;
+	size_t zresult;
+
+#if PY_MAJOR_VERSION >= 3
+	if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
+#else
+	if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
+#endif
+		&source, &sourceSize)) {
+		return NULL;
+	}
+
+	/* Needed for Python 2 to reject unicode */
+	if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
+		PyErr_SetString(PyExc_TypeError, "argument must be bytes");
+		return NULL;
+	}
+
+	zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
+
+	if (ZSTD_isError(zresult)) {
+		PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
+		return NULL;
+	}
+
+	if (zresult) {
+		PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
+		return NULL;
+	}
+
+	result = PyObject_New(FrameParametersObject, &FrameParametersType);
+	if (!result) {
+		return NULL;
+	}
+
+	result->frameContentSize = params.frameContentSize;
+	result->windowSize = params.windowSize;
+	result->dictID = params.dictID;
+	result->checksumFlag = params.checksumFlag ? 1 : 0;
+
+	return result;
+}
+
+static void FrameParameters_dealloc(PyObject* self) {
+	PyObject_Del(self);
+}
+
+static PyMemberDef FrameParameters_members[] = {
+	{ "content_size", T_ULONGLONG,
+	  offsetof(FrameParametersObject, frameContentSize), READONLY,
+	  "frame content size" },
+	{ "window_size", T_UINT,
+	  offsetof(FrameParametersObject, windowSize), READONLY,
+	  "window size" },
+	{ "dict_id", T_UINT,
+	  offsetof(FrameParametersObject, dictID), READONLY,
+	  "dictionary ID" },
+	{ "has_checksum", T_BOOL,
+	  offsetof(FrameParametersObject, checksumFlag), READONLY,
+	  "checksum flag" },
+	{ NULL }
+};
+
+PyTypeObject FrameParametersType = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	"FrameParameters",          /* tp_name */
+	sizeof(FrameParametersObject), /* tp_basicsize */
+	0,                         /* tp_itemsize */
+	(destructor)FrameParameters_dealloc, /* tp_dealloc */
+	0,                         /* tp_print */
+	0,                         /* tp_getattr */
+	0,                         /* tp_setattr */
+	0,                         /* tp_compare */
+	0,                         /* tp_repr */
+	0,                         /* tp_as_number */
+	0,                         /* tp_as_sequence */
+	0,                         /* tp_as_mapping */
+	0,                         /* tp_hash  */
+	0,                         /* tp_call */
+	0,                         /* tp_str */
+	0,                         /* tp_getattro */
+	0,                         /* tp_setattro */
+	0,                         /* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT,        /* tp_flags */
+	FrameParameters__doc__,    /* tp_doc */
+	0,                         /* tp_traverse */
+	0,                         /* tp_clear */
+	0,                         /* tp_richcompare */
+	0,                         /* tp_weaklistoffset */
+	0,                         /* tp_iter */
+	0,                         /* tp_iternext */
+	0,                         /* tp_methods */
+	FrameParameters_members,   /* tp_members */
+	0,                         /* tp_getset */
+	0,                         /* tp_base */
+	0,                         /* tp_dict */
+	0,                         /* tp_descr_get */
+	0,                         /* tp_descr_set */
+	0,                         /* tp_dictoffset */
+	0,                         /* tp_init */
+	0,                         /* tp_alloc */
+	0,                         /* tp_new */
+};
+
+void frameparams_module_init(PyObject* mod) {
+	Py_TYPE(&FrameParametersType) = &PyType_Type;
+	if (PyType_Ready(&FrameParametersType) < 0) {
+		return;
+	}
+
+	Py_IncRef((PyObject*)&FrameParametersType);
+	PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
+}
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/c-ext/python-zstandard.h
--- a/contrib/python-zstandard/c-ext/python-zstandard.h	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/c-ext/python-zstandard.h	Mon Feb 13 09:44:16 2017 -0800
@@ -8,6 +8,7 @@
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
+#include "structmember.h"
 
 #define ZSTD_STATIC_LINKING_ONLY
 #define ZDICT_STATIC_LINKING_ONLY
@@ -15,7 +16,7 @@
 #include "zstd.h"
 #include "zdict.h"
 
-#define PYTHON_ZSTANDARD_VERSION "0.6.0"
+#define PYTHON_ZSTANDARD_VERSION "0.7.0"
 
 typedef enum {
 	compressorobj_flush_finish,
@@ -37,6 +38,16 @@
 
 typedef struct {
 	PyObject_HEAD
+	unsigned long long frameContentSize;
+	unsigned windowSize;
+	unsigned dictID;
+	char checksumFlag;
+} FrameParametersObject;
+
+extern PyTypeObject FrameParametersType;
+
+typedef struct {
+	PyObject_HEAD
 	unsigned selectivityLevel;
 	int compressionLevel;
 	unsigned notificationLevel;
@@ -115,7 +126,7 @@
 typedef struct {
 	PyObject_HEAD
 
-	ZSTD_DCtx* refdctx;
+	ZSTD_DCtx* dctx;
 
 	ZstdCompressionDict* dict;
 	ZSTD_DDict* ddict;
@@ -172,6 +183,7 @@
 
 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
+FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize);
 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor);
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/make_cffi.py
--- a/contrib/python-zstandard/make_cffi.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/make_cffi.py	Mon Feb 13 09:44:16 2017 -0800
@@ -9,6 +9,7 @@
 import cffi
 import distutils.ccompiler
 import os
+import re
 import subprocess
 import tempfile
 
@@ -19,6 +20,8 @@
     'common/entropy_common.c',
     'common/error_private.c',
     'common/fse_decompress.c',
+    'common/pool.c',
+    'common/threading.c',
     'common/xxhash.c',
     'common/zstd_common.c',
     'compress/fse_compress.c',
@@ -26,10 +29,17 @@
     'compress/zstd_compress.c',
     'decompress/huf_decompress.c',
     'decompress/zstd_decompress.c',
+    'dictBuilder/cover.c',
     'dictBuilder/divsufsort.c',
     'dictBuilder/zdict.c',
 )]
 
+HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
+    ('zstd.h',),
+    ('common', 'pool.h'),
+    ('dictBuilder', 'zdict.h'),
+)]
+
 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
     'zstd',
     'zstd/common',
@@ -53,56 +63,92 @@
     args.extend([
         '-E',
         '-DZSTD_STATIC_LINKING_ONLY',
+        '-DZDICT_STATIC_LINKING_ONLY',
     ])
 elif compiler.compiler_type == 'msvc':
     args = [compiler.cc]
     args.extend([
         '/EP',
         '/DZSTD_STATIC_LINKING_ONLY',
+        '/DZDICT_STATIC_LINKING_ONLY',
     ])
 else:
     raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
 
-# zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
-# This can lead to duplicate declarations. So we strip this include from the
-# preprocessor invocation.
+def preprocess(path):
+    # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
+    # This can lead to duplicate declarations. So we strip this include from the
+    # preprocessor invocation.
+    with open(path, 'rb') as fh:
+        lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
 
-with open(os.path.join(HERE, 'zstd', 'zstd.h'), 'rb') as fh:
-    lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
-
-fd, input_file = tempfile.mkstemp(suffix='.h')
-os.write(fd, b''.join(lines))
-os.close(fd)
+    fd, input_file = tempfile.mkstemp(suffix='.h')
+    os.write(fd, b''.join(lines))
+    os.close(fd)
 
-args.append(input_file)
+    try:
+        process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
+        output = process.communicate()[0]
+        ret = process.poll()
+        if ret:
+            raise Exception('preprocessor exited with error')
 
-try:
-    process = subprocess.Popen(args, stdout=subprocess.PIPE)
-    output = process.communicate()[0]
-    ret = process.poll()
-    if ret:
-        raise Exception('preprocessor exited with error')
-finally:
-    os.unlink(input_file)
+        return output
+    finally:
+        os.unlink(input_file)
 
-def normalize_output():
+
+def normalize_output(output):
     lines = []
     for line in output.splitlines():
         # CFFI's parser doesn't like __attribute__ on UNIX compilers.
         if line.startswith(b'__attribute__ ((visibility ("default"))) '):
             line = line[len(b'__attribute__ ((visibility ("default"))) '):]
 
+        if line.startswith(b'__attribute__((deprecated('):
+            continue
+        elif b'__declspec(deprecated(' in line:
+            continue
+
         lines.append(line)
 
     return b'\n'.join(lines)
 
+
 ffi = cffi.FFI()
 ffi.set_source('_zstd_cffi', '''
+#include "mem.h"
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
+#define ZDICT_STATIC_LINKING_ONLY
+#include "pool.h"
+#include "zdict.h"
 ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
 
-ffi.cdef(normalize_output().decode('latin1'))
+DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
+
+sources = []
+
+for header in HEADERS:
+    preprocessed = preprocess(header)
+    sources.append(normalize_output(preprocessed))
+
+    # Do another pass over source and find constants that were preprocessed
+    # away.
+    with open(header, 'rb') as fh:
+        for line in fh:
+            line = line.strip()
+            m = DEFINE.match(line)
+            if not m:
+                continue
+
+            # The parser doesn't like some constants with complex values.
+            if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
+                continue
+
+            sources.append(m.group(0) + b' ...')
+
+ffi.cdef(u'\n'.join(s.decode('latin1') for s in sources))
 
 if __name__ == '__main__':
     ffi.compile()
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/setup.py
--- a/contrib/python-zstandard/setup.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/setup.py	Mon Feb 13 09:44:16 2017 -0800
@@ -62,6 +62,7 @@
         'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
     ],
     keywords='zstandard zstd compression',
     ext_modules=extensions,
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/setup_zstd.py
--- a/contrib/python-zstandard/setup_zstd.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/setup_zstd.py	Mon Feb 13 09:44:16 2017 -0800
@@ -12,6 +12,8 @@
     'common/entropy_common.c',
     'common/error_private.c',
     'common/fse_decompress.c',
+    'common/pool.c',
+    'common/threading.c',
     'common/xxhash.c',
     'common/zstd_common.c',
     'compress/fse_compress.c',
@@ -19,11 +21,13 @@
     'compress/zstd_compress.c',
     'decompress/huf_decompress.c',
     'decompress/zstd_decompress.c',
+    'dictBuilder/cover.c',
     'dictBuilder/divsufsort.c',
     'dictBuilder/zdict.c',
 )]
 
 zstd_sources_legacy = ['zstd/%s' % p for p in (
+    'deprecated/zbuff_common.c',
     'deprecated/zbuff_compress.c',
     'deprecated/zbuff_decompress.c',
     'legacy/zstd_v01.c',
@@ -63,6 +67,7 @@
     'c-ext/decompressoriterator.c',
     'c-ext/decompressionwriter.c',
     'c-ext/dictparams.c',
+    'c-ext/frameparams.c',
 ]
 
 zstd_depends = [
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/tests/common.py
--- a/contrib/python-zstandard/tests/common.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/tests/common.py	Mon Feb 13 09:44:16 2017 -0800
@@ -1,4 +1,50 @@
+import inspect
 import io
+import types
+
+
+def make_cffi(cls):
+    """Decorator to add CFFI versions of each test method."""
+
+    try:
+        import zstd_cffi
+    except ImportError:
+        return cls
+
+    # If CFFI version is available, dynamically construct test methods
+    # that use it.
+
+    for attr in dir(cls):
+        fn = getattr(cls, attr)
+        if not inspect.ismethod(fn) and not inspect.isfunction(fn):
+            continue
+
+        if not fn.__name__.startswith('test_'):
+            continue
+
+        name = '%s_cffi' % fn.__name__
+
+        # Replace the "zstd" symbol with the CFFI module instance. Then copy
+        # the function object and install it in a new attribute.
+        if isinstance(fn, types.FunctionType):
+            globs = dict(fn.__globals__)
+            globs['zstd'] = zstd_cffi
+            new_fn = types.FunctionType(fn.__code__, globs, name,
+                                        fn.__defaults__, fn.__closure__)
+            new_method = new_fn
+        else:
+            globs = dict(fn.__func__.func_globals)
+            globs['zstd'] = zstd_cffi
+            new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
+                                        fn.__func__.func_defaults,
+                                        fn.__func__.func_closure)
+            new_method = types.UnboundMethodType(new_fn, fn.im_self,
+                                                 fn.im_class)
+
+        setattr(cls, name, new_method)
+
+    return cls
+
 
 class OpCountingBytesIO(io.BytesIO):
     def __init__(self, *args, **kwargs):
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/tests/test_cffi.py
--- a/contrib/python-zstandard/tests/test_cffi.py	Fri Feb 10 18:20:58 2017 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-import io
-
-try:
-    import unittest2 as unittest
-except ImportError:
-    import unittest
-
-import zstd
-
-try:
-    import zstd_cffi
-except ImportError:
-    raise unittest.SkipTest('cffi version of zstd not available')
-
-
-class TestCFFIWriteToToCDecompressor(unittest.TestCase):
-    def test_simple(self):
-        orig = io.BytesIO()
-        orig.write(b'foo')
-        orig.write(b'bar')
-        orig.write(b'foobar' * 16384)
-
-        dest = io.BytesIO()
-        cctx = zstd_cffi.ZstdCompressor()
-        with cctx.write_to(dest) as compressor:
-            compressor.write(orig.getvalue())
-
-        uncompressed = io.BytesIO()
-        dctx = zstd.ZstdDecompressor()
-        with dctx.write_to(uncompressed) as decompressor:
-            decompressor.write(dest.getvalue())
-
-        self.assertEqual(uncompressed.getvalue(), orig.getvalue())
-
-
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/tests/test_compressor.py
--- a/contrib/python-zstandard/tests/test_compressor.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/tests/test_compressor.py	Mon Feb 13 09:44:16 2017 -0800
@@ -10,7 +10,10 @@
 
 import zstd
 
-from .common import OpCountingBytesIO
+from .common import (
+    make_cffi,
+    OpCountingBytesIO,
+)
 
 
 if sys.version_info[0] >= 3:
@@ -19,6 +22,7 @@
     next = lambda it: it.next()
 
 
+@make_cffi
 class TestCompressor(unittest.TestCase):
     def test_level_bounds(self):
         with self.assertRaises(ValueError):
@@ -28,18 +32,17 @@
             zstd.ZstdCompressor(level=23)
 
 
+@make_cffi
 class TestCompressor_compress(unittest.TestCase):
     def test_compress_empty(self):
         cctx = zstd.ZstdCompressor(level=1)
-        cctx.compress(b'')
-
-        cctx = zstd.ZstdCompressor(level=22)
-        cctx.compress(b'')
-
-    def test_compress_empty(self):
-        cctx = zstd.ZstdCompressor(level=1)
-        self.assertEqual(cctx.compress(b''),
-                         b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
+        result = cctx.compress(b'')
+        self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
+        params = zstd.get_frame_parameters(result)
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 524288)
+        self.assertEqual(params.dict_id, 0)
+        self.assertFalse(params.has_checksum, 0)
 
         # TODO should be temporary until https://github.com/facebook/zstd/issues/506
         # is fixed.
@@ -59,6 +62,13 @@
         self.assertEqual(len(result), 999)
         self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
 
+        # This matches the test for read_from() below.
+        cctx = zstd.ZstdCompressor(level=1)
+        result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
+        self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
+                                 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
+                                 b'\x02\x09\x00\x00\x6f')
+
     def test_write_checksum(self):
         cctx = zstd.ZstdCompressor(level=1)
         no_checksum = cctx.compress(b'foobar')
@@ -67,6 +77,12 @@
 
         self.assertEqual(len(with_checksum), len(no_checksum) + 4)
 
+        no_params = zstd.get_frame_parameters(no_checksum)
+        with_params = zstd.get_frame_parameters(with_checksum)
+
+        self.assertFalse(no_params.has_checksum)
+        self.assertTrue(with_params.has_checksum)
+
     def test_write_content_size(self):
         cctx = zstd.ZstdCompressor(level=1)
         no_size = cctx.compress(b'foobar' * 256)
@@ -75,6 +91,11 @@
 
         self.assertEqual(len(with_size), len(no_size) + 1)
 
+        no_params = zstd.get_frame_parameters(no_size)
+        with_params = zstd.get_frame_parameters(with_size)
+        self.assertEqual(no_params.content_size, 0)
+        self.assertEqual(with_params.content_size, 1536)
+
     def test_no_dict_id(self):
         samples = []
         for i in range(128):
@@ -92,6 +113,11 @@
 
         self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
 
+        no_params = zstd.get_frame_parameters(no_dict_id)
+        with_params = zstd.get_frame_parameters(with_dict_id)
+        self.assertEqual(no_params.dict_id, 0)
+        self.assertEqual(with_params.dict_id, 1584102229)
+
     def test_compress_dict_multiple(self):
         samples = []
         for i in range(128):
@@ -107,6 +133,7 @@
             cctx.compress(b'foo bar foobar foo bar foobar')
 
 
+@make_cffi
 class TestCompressor_compressobj(unittest.TestCase):
     def test_compressobj_empty(self):
         cctx = zstd.ZstdCompressor(level=1)
@@ -127,6 +154,12 @@
         self.assertEqual(len(result), 999)
         self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
 
+        params = zstd.get_frame_parameters(result)
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 1048576)
+        self.assertEqual(params.dict_id, 0)
+        self.assertFalse(params.has_checksum)
+
     def test_write_checksum(self):
         cctx = zstd.ZstdCompressor(level=1)
         cobj = cctx.compressobj()
@@ -135,6 +168,15 @@
         cobj = cctx.compressobj()
         with_checksum = cobj.compress(b'foobar') + cobj.flush()
 
+        no_params = zstd.get_frame_parameters(no_checksum)
+        with_params = zstd.get_frame_parameters(with_checksum)
+        self.assertEqual(no_params.content_size, 0)
+        self.assertEqual(with_params.content_size, 0)
+        self.assertEqual(no_params.dict_id, 0)
+        self.assertEqual(with_params.dict_id, 0)
+        self.assertFalse(no_params.has_checksum)
+        self.assertTrue(with_params.has_checksum)
+
         self.assertEqual(len(with_checksum), len(no_checksum) + 4)
 
     def test_write_content_size(self):
@@ -145,6 +187,15 @@
         cobj = cctx.compressobj(size=len(b'foobar' * 256))
         with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
 
+        no_params = zstd.get_frame_parameters(no_size)
+        with_params = zstd.get_frame_parameters(with_size)
+        self.assertEqual(no_params.content_size, 0)
+        self.assertEqual(with_params.content_size, 1536)
+        self.assertEqual(no_params.dict_id, 0)
+        self.assertEqual(with_params.dict_id, 0)
+        self.assertFalse(no_params.has_checksum)
+        self.assertFalse(with_params.has_checksum)
+
         self.assertEqual(len(with_size), len(no_size) + 1)
 
     def test_compress_after_finished(self):
@@ -187,6 +238,7 @@
         self.assertEqual(header, b'\x01\x00\x00')
 
 
+@make_cffi
 class TestCompressor_copy_stream(unittest.TestCase):
     def test_no_read(self):
         source = object()
@@ -229,6 +281,12 @@
         self.assertEqual(r, 255 * 16384)
         self.assertEqual(w, 999)
 
+        params = zstd.get_frame_parameters(dest.getvalue())
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 1048576)
+        self.assertEqual(params.dict_id, 0)
+        self.assertFalse(params.has_checksum)
+
     def test_write_checksum(self):
         source = io.BytesIO(b'foobar')
         no_checksum = io.BytesIO()
@@ -244,6 +302,15 @@
         self.assertEqual(len(with_checksum.getvalue()),
                          len(no_checksum.getvalue()) + 4)
 
+        no_params = zstd.get_frame_parameters(no_checksum.getvalue())
+        with_params = zstd.get_frame_parameters(with_checksum.getvalue())
+        self.assertEqual(no_params.content_size, 0)
+        self.assertEqual(with_params.content_size, 0)
+        self.assertEqual(no_params.dict_id, 0)
+        self.assertEqual(with_params.dict_id, 0)
+        self.assertFalse(no_params.has_checksum)
+        self.assertTrue(with_params.has_checksum)
+
     def test_write_content_size(self):
         source = io.BytesIO(b'foobar' * 256)
         no_size = io.BytesIO()
@@ -268,6 +335,15 @@
         self.assertEqual(len(with_size.getvalue()),
                          len(no_size.getvalue()) + 1)
 
+        no_params = zstd.get_frame_parameters(no_size.getvalue())
+        with_params = zstd.get_frame_parameters(with_size.getvalue())
+        self.assertEqual(no_params.content_size, 0)
+        self.assertEqual(with_params.content_size, 1536)
+        self.assertEqual(no_params.dict_id, 0)
+        self.assertEqual(with_params.dict_id, 0)
+        self.assertFalse(no_params.has_checksum)
+        self.assertFalse(with_params.has_checksum)
+
     def test_read_write_size(self):
         source = OpCountingBytesIO(b'foobarfoobar')
         dest = OpCountingBytesIO()
@@ -288,18 +364,25 @@
     return buffer.getvalue()
 
 
+@make_cffi
 class TestCompressor_write_to(unittest.TestCase):
     def test_empty(self):
-        self.assertEqual(compress(b'', 1),
-                         b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
+        result = compress(b'', 1)
+        self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
+
+        params = zstd.get_frame_parameters(result)
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 524288)
+        self.assertEqual(params.dict_id, 0)
+        self.assertFalse(params.has_checksum)
 
     def test_multiple_compress(self):
         buffer = io.BytesIO()
         cctx = zstd.ZstdCompressor(level=5)
         with cctx.write_to(buffer) as compressor:
-            compressor.write(b'foo')
-            compressor.write(b'bar')
-            compressor.write(b'x' * 8192)
+            self.assertEqual(compressor.write(b'foo'), 0)
+            self.assertEqual(compressor.write(b'bar'), 0)
+            self.assertEqual(compressor.write(b'x' * 8192), 0)
 
         result = buffer.getvalue()
         self.assertEqual(result,
@@ -318,11 +401,23 @@
         buffer = io.BytesIO()
         cctx = zstd.ZstdCompressor(level=9, dict_data=d)
         with cctx.write_to(buffer) as compressor:
-            compressor.write(b'foo')
-            compressor.write(b'bar')
-            compressor.write(b'foo' * 16384)
+            self.assertEqual(compressor.write(b'foo'), 0)
+            self.assertEqual(compressor.write(b'bar'), 0)
+            self.assertEqual(compressor.write(b'foo' * 16384), 634)
 
         compressed = buffer.getvalue()
+
+        params = zstd.get_frame_parameters(compressed)
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 1024)
+        self.assertEqual(params.dict_id, d.dict_id())
+        self.assertFalse(params.has_checksum)
+
+        self.assertEqual(compressed[0:32],
+                         b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
+                         b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
+                         b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
+
         h = hashlib.sha1(compressed).hexdigest()
         self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
 
@@ -332,11 +427,18 @@
         buffer = io.BytesIO()
         cctx = zstd.ZstdCompressor(compression_params=params)
         with cctx.write_to(buffer) as compressor:
-            compressor.write(b'foo')
-            compressor.write(b'bar')
-            compressor.write(b'foobar' * 16384)
+            self.assertEqual(compressor.write(b'foo'), 0)
+            self.assertEqual(compressor.write(b'bar'), 0)
+            self.assertEqual(compressor.write(b'foobar' * 16384), 0)
 
         compressed = buffer.getvalue()
+
+        params = zstd.get_frame_parameters(compressed)
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 1048576)
+        self.assertEqual(params.dict_id, 0)
+        self.assertFalse(params.has_checksum)
+
         h = hashlib.sha1(compressed).hexdigest()
         self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
 
@@ -344,12 +446,21 @@
         no_checksum = io.BytesIO()
         cctx = zstd.ZstdCompressor(level=1)
         with cctx.write_to(no_checksum) as compressor:
-            compressor.write(b'foobar')
+            self.assertEqual(compressor.write(b'foobar'), 0)
 
         with_checksum = io.BytesIO()
         cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
         with cctx.write_to(with_checksum) as compressor:
-            compressor.write(b'foobar')
+            self.assertEqual(compressor.write(b'foobar'), 0)
+
+        no_params = zstd.get_frame_parameters(no_checksum.getvalue())
+        with_params = zstd.get_frame_parameters(with_checksum.getvalue())
+        self.assertEqual(no_params.content_size, 0)
+        self.assertEqual(with_params.content_size, 0)
+        self.assertEqual(no_params.dict_id, 0)
+        self.assertEqual(with_params.dict_id, 0)
+        self.assertFalse(no_params.has_checksum)
+        self.assertTrue(with_params.has_checksum)
 
         self.assertEqual(len(with_checksum.getvalue()),
                          len(no_checksum.getvalue()) + 4)
@@ -358,12 +469,12 @@
         no_size = io.BytesIO()
         cctx = zstd.ZstdCompressor(level=1)
         with cctx.write_to(no_size) as compressor:
-            compressor.write(b'foobar' * 256)
+            self.assertEqual(compressor.write(b'foobar' * 256), 0)
 
         with_size = io.BytesIO()
         cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
         with cctx.write_to(with_size) as compressor:
-            compressor.write(b'foobar' * 256)
+            self.assertEqual(compressor.write(b'foobar' * 256), 0)
 
         # Source size is not known in streaming mode, so header not
         # written.
@@ -373,7 +484,16 @@
         # Declaring size will write the header.
         with_size = io.BytesIO()
         with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
-            compressor.write(b'foobar' * 256)
+            self.assertEqual(compressor.write(b'foobar' * 256), 0)
+
+        no_params = zstd.get_frame_parameters(no_size.getvalue())
+        with_params = zstd.get_frame_parameters(with_size.getvalue())
+        self.assertEqual(no_params.content_size, 0)
+        self.assertEqual(with_params.content_size, 1536)
+        self.assertEqual(no_params.dict_id, 0)
+        self.assertEqual(with_params.dict_id, 0)
+        self.assertFalse(no_params.has_checksum)
+        self.assertFalse(with_params.has_checksum)
 
         self.assertEqual(len(with_size.getvalue()),
                          len(no_size.getvalue()) + 1)
@@ -390,12 +510,21 @@
         with_dict_id = io.BytesIO()
         cctx = zstd.ZstdCompressor(level=1, dict_data=d)
         with cctx.write_to(with_dict_id) as compressor:
-            compressor.write(b'foobarfoobar')
+            self.assertEqual(compressor.write(b'foobarfoobar'), 0)
 
         cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
         no_dict_id = io.BytesIO()
         with cctx.write_to(no_dict_id) as compressor:
-            compressor.write(b'foobarfoobar')
+            self.assertEqual(compressor.write(b'foobarfoobar'), 0)
+
+        no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
+        with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
+        self.assertEqual(no_params.content_size, 0)
+        self.assertEqual(with_params.content_size, 0)
+        self.assertEqual(no_params.dict_id, 0)
+        self.assertEqual(with_params.dict_id, d.dict_id())
+        self.assertFalse(no_params.has_checksum)
+        self.assertFalse(with_params.has_checksum)
 
         self.assertEqual(len(with_dict_id.getvalue()),
                          len(no_dict_id.getvalue()) + 4)
@@ -412,9 +541,9 @@
         cctx = zstd.ZstdCompressor(level=3)
         dest = OpCountingBytesIO()
         with cctx.write_to(dest, write_size=1) as compressor:
-            compressor.write(b'foo')
-            compressor.write(b'bar')
-            compressor.write(b'foobar')
+            self.assertEqual(compressor.write(b'foo'), 0)
+            self.assertEqual(compressor.write(b'bar'), 0)
+            self.assertEqual(compressor.write(b'foobar'), 0)
 
         self.assertEqual(len(dest.getvalue()), dest._write_count)
 
@@ -422,15 +551,15 @@
         cctx = zstd.ZstdCompressor(level=3)
         dest = OpCountingBytesIO()
         with cctx.write_to(dest) as compressor:
-            compressor.write(b'foo')
+            self.assertEqual(compressor.write(b'foo'), 0)
             self.assertEqual(dest._write_count, 0)
-            compressor.flush()
+            self.assertEqual(compressor.flush(), 12)
             self.assertEqual(dest._write_count, 1)
-            compressor.write(b'bar')
+            self.assertEqual(compressor.write(b'bar'), 0)
             self.assertEqual(dest._write_count, 1)
-            compressor.flush()
+            self.assertEqual(compressor.flush(), 6)
             self.assertEqual(dest._write_count, 2)
-            compressor.write(b'baz')
+            self.assertEqual(compressor.write(b'baz'), 0)
 
         self.assertEqual(dest._write_count, 3)
 
@@ -438,10 +567,10 @@
         cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
         dest = OpCountingBytesIO()
         with cctx.write_to(dest) as compressor:
-            compressor.write(b'foobar' * 8192)
+            self.assertEqual(compressor.write(b'foobar' * 8192), 0)
             count = dest._write_count
             offset = dest.tell()
-            compressor.flush()
+            self.assertEqual(compressor.flush(), 23)
             self.assertGreater(dest._write_count, count)
             self.assertGreater(dest.tell(), offset)
             offset = dest.tell()
@@ -456,18 +585,22 @@
         self.assertEqual(header, b'\x01\x00\x00')
 
 
+@make_cffi
 class TestCompressor_read_from(unittest.TestCase):
     def test_type_validation(self):
         cctx = zstd.ZstdCompressor()
 
         # Object with read() works.
-        cctx.read_from(io.BytesIO())
+        for chunk in cctx.read_from(io.BytesIO()):
+            pass
 
         # Buffer protocol works.
-        cctx.read_from(b'foobar')
+        for chunk in cctx.read_from(b'foobar'):
+            pass
 
         with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
-            cctx.read_from(True)
+            for chunk in cctx.read_from(True):
+                pass
 
     def test_read_empty(self):
         cctx = zstd.ZstdCompressor(level=1)
@@ -521,6 +654,12 @@
         # We should get the same output as the one-shot compression mechanism.
         self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
 
+        params = zstd.get_frame_parameters(b''.join(chunks))
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 262144)
+        self.assertEqual(params.dict_id, 0)
+        self.assertFalse(params.has_checksum)
+
         # Now check the buffer protocol.
         it = cctx.read_from(source.getvalue())
         chunks = list(it)
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/tests/test_data_structures.py
--- a/contrib/python-zstandard/tests/test_data_structures.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/tests/test_data_structures.py	Mon Feb 13 09:44:16 2017 -0800
@@ -13,6 +13,12 @@
 
 import zstd
 
+from . common import (
+    make_cffi,
+)
+
+
+@make_cffi
 class TestCompressionParameters(unittest.TestCase):
     def test_init_bad_arg_type(self):
         with self.assertRaises(TypeError):
@@ -42,7 +48,81 @@
         p = zstd.get_compression_parameters(1)
         self.assertIsInstance(p, zstd.CompressionParameters)
 
-        self.assertEqual(p[0], 19)
+        self.assertEqual(p.window_log, 19)
+
+    def test_members(self):
+        p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)
+        self.assertEqual(p.window_log, 10)
+        self.assertEqual(p.chain_log, 6)
+        self.assertEqual(p.hash_log, 7)
+        self.assertEqual(p.search_log, 4)
+        self.assertEqual(p.search_length, 5)
+        self.assertEqual(p.target_length, 8)
+        self.assertEqual(p.strategy, 1)
+
+
+@make_cffi
+class TestFrameParameters(unittest.TestCase):
+    def test_invalid_type(self):
+        with self.assertRaises(TypeError):
+            zstd.get_frame_parameters(None)
+
+        with self.assertRaises(TypeError):
+            zstd.get_frame_parameters(u'foobarbaz')
+
+    def test_invalid_input_sizes(self):
+        with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
+            zstd.get_frame_parameters(b'')
+
+        with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
+            zstd.get_frame_parameters(zstd.FRAME_HEADER)
+
+    def test_invalid_frame(self):
+        with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
+            zstd.get_frame_parameters(b'foobarbaz')
+
+    def test_attributes(self):
+        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 1024)
+        self.assertEqual(params.dict_id, 0)
+        self.assertFalse(params.has_checksum)
+
+        # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
+        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 1024)
+        self.assertEqual(params.dict_id, 255)
+        self.assertFalse(params.has_checksum)
+
+        # Lowest 3rd bit indicates if checksum is present.
+        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 1024)
+        self.assertEqual(params.dict_id, 0)
+        self.assertTrue(params.has_checksum)
+
+        # Upper 2 bits indicate content size.
+        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
+        self.assertEqual(params.content_size, 511)
+        self.assertEqual(params.window_size, 1024)
+        self.assertEqual(params.dict_id, 0)
+        self.assertFalse(params.has_checksum)
+
+        # Window descriptor is 2nd byte after frame header.
+        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
+        self.assertEqual(params.content_size, 0)
+        self.assertEqual(params.window_size, 262144)
+        self.assertEqual(params.dict_id, 0)
+        self.assertFalse(params.has_checksum)
+
+        # Set multiple things.
+        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
+        self.assertEqual(params.content_size, 272)
+        self.assertEqual(params.window_size, 262144)
+        self.assertEqual(params.dict_id, 15)
+        self.assertTrue(params.has_checksum)
+
 
 if hypothesis:
     s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
@@ -65,6 +145,8 @@
                                           zstd.STRATEGY_BTLAZY2,
                                           zstd.STRATEGY_BTOPT))
 
+
+    @make_cffi
     class TestCompressionParametersHypothesis(unittest.TestCase):
         @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
                           s_searchlength, s_targetlength, s_strategy)
@@ -73,9 +155,6 @@
             p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
                                            searchlog, searchlength,
                                            targetlength, strategy)
-            self.assertEqual(tuple(p),
-                             (windowlog, chainlog, hashlog, searchlog,
-                              searchlength, targetlength, strategy))
 
             # Verify we can instantiate a compressor with the supplied values.
             # ZSTD_checkCParams moves the goal posts on us from what's advertised
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/tests/test_decompressor.py
--- a/contrib/python-zstandard/tests/test_decompressor.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/tests/test_decompressor.py	Mon Feb 13 09:44:16 2017 -0800
@@ -10,7 +10,10 @@
 
 import zstd
 
-from .common import OpCountingBytesIO
+from .common import (
+    make_cffi,
+    OpCountingBytesIO,
+)
 
 
 if sys.version_info[0] >= 3:
@@ -19,6 +22,7 @@
     next = lambda it: it.next()
 
 
+@make_cffi
 class TestDecompressor_decompress(unittest.TestCase):
     def test_empty_input(self):
         dctx = zstd.ZstdDecompressor()
@@ -119,6 +123,7 @@
             self.assertEqual(decompressed, sources[i])
 
 
+@make_cffi
 class TestDecompressor_copy_stream(unittest.TestCase):
     def test_no_read(self):
         source = object()
@@ -180,6 +185,7 @@
         self.assertEqual(dest._write_count, len(dest.getvalue()))
 
 
+@make_cffi
 class TestDecompressor_decompressobj(unittest.TestCase):
     def test_simple(self):
         data = zstd.ZstdCompressor(level=1).compress(b'foobar')
@@ -207,6 +213,7 @@
     return buffer.getvalue()
 
 
+@make_cffi
 class TestDecompressor_write_to(unittest.TestCase):
     def test_empty_roundtrip(self):
         cctx = zstd.ZstdCompressor()
@@ -256,14 +263,14 @@
         buffer = io.BytesIO()
         cctx = zstd.ZstdCompressor(dict_data=d)
         with cctx.write_to(buffer) as compressor:
-            compressor.write(orig)
+            self.assertEqual(compressor.write(orig), 1544)
 
         compressed = buffer.getvalue()
         buffer = io.BytesIO()
 
         dctx = zstd.ZstdDecompressor(dict_data=d)
         with dctx.write_to(buffer) as decompressor:
-            decompressor.write(compressed)
+            self.assertEqual(decompressor.write(compressed), len(orig))
 
         self.assertEqual(buffer.getvalue(), orig)
 
@@ -291,6 +298,7 @@
         self.assertEqual(dest._write_count, len(dest.getvalue()))
 
 
+@make_cffi
 class TestDecompressor_read_from(unittest.TestCase):
     def test_type_validation(self):
         dctx = zstd.ZstdDecompressor()
@@ -302,7 +310,7 @@
         dctx.read_from(b'foobar')
 
         with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
-            dctx.read_from(True)
+            b''.join(dctx.read_from(True))
 
     def test_empty_input(self):
         dctx = zstd.ZstdDecompressor()
@@ -351,7 +359,7 @@
         dctx = zstd.ZstdDecompressor()
 
         with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
-            dctx.read_from(b'', skip_bytes=1, read_size=1)
+            b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))
 
         with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
             b''.join(dctx.read_from(b'foobar', skip_bytes=10))
@@ -476,3 +484,94 @@
             self.assertEqual(len(chunk), 1)
 
         self.assertEqual(source._read_count, len(source.getvalue()))
+
+
+@make_cffi
+class TestDecompressor_content_dict_chain(unittest.TestCase):
+    def test_bad_inputs_simple(self):
+        dctx = zstd.ZstdDecompressor()
+
+        with self.assertRaises(TypeError):
+            dctx.decompress_content_dict_chain(b'foo')
+
+        with self.assertRaises(TypeError):
+            dctx.decompress_content_dict_chain((b'foo', b'bar'))
+
+        with self.assertRaisesRegexp(ValueError, 'empty input chain'):
+            dctx.decompress_content_dict_chain([])
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
+            dctx.decompress_content_dict_chain([u'foo'])
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
+            dctx.decompress_content_dict_chain([True])
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
+            dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
+            dctx.decompress_content_dict_chain([b'foo' * 8])
+
+        no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
+            dctx.decompress_content_dict_chain([no_size])
+
+        # Corrupt first frame.
+        frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
+        frame = frame[0:12] + frame[15:]
+        with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'):
+            dctx.decompress_content_dict_chain([frame])
+
+    def test_bad_subsequent_input(self):
+        initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
+
+        dctx = zstd.ZstdDecompressor()
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
+            dctx.decompress_content_dict_chain([initial, u'foo'])
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
+            dctx.decompress_content_dict_chain([initial, None])
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
+            dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
+            dctx.decompress_content_dict_chain([initial, b'foo' * 8])
+
+        no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
+
+        with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
+            dctx.decompress_content_dict_chain([initial, no_size])
+
+        # Corrupt second frame.
+        cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
+        frame = cctx.compress(b'bar' * 64)
+        frame = frame[0:12] + frame[15:]
+
+        with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'):
+            dctx.decompress_content_dict_chain([initial, frame])
+
+    def test_simple(self):
+        original = [
+            b'foo' * 64,
+            b'foobar' * 64,
+            b'baz' * 64,
+            b'foobaz' * 64,
+            b'foobarbaz' * 64,
+        ]
+
+        chunks = []
+        chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
+        for i, chunk in enumerate(original[1:]):
+            d = zstd.ZstdCompressionDict(original[i])
+            cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
+            chunks.append(cctx.compress(chunk))
+
+        for i in range(1, len(original)):
+            chain = chunks[0:i]
+            expected = original[i - 1]
+            dctx = zstd.ZstdDecompressor()
+            decompressed = dctx.decompress_content_dict_chain(chain)
+            self.assertEqual(decompressed, expected)
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/tests/test_estimate_sizes.py
--- a/contrib/python-zstandard/tests/test_estimate_sizes.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/tests/test_estimate_sizes.py	Mon Feb 13 09:44:16 2017 -0800
@@ -5,7 +5,12 @@
 
 import zstd
 
+from . common import (
+    make_cffi,
+)
 
+
+@make_cffi
 class TestSizes(unittest.TestCase):
     def test_decompression_size(self):
         size = zstd.estimate_decompression_context_size()
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/tests/test_module_attributes.py
--- a/contrib/python-zstandard/tests/test_module_attributes.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/tests/test_module_attributes.py	Mon Feb 13 09:44:16 2017 -0800
@@ -7,9 +7,15 @@
 
 import zstd
 
+from . common import (
+    make_cffi,
+)
+
+
+@make_cffi
 class TestModuleAttributes(unittest.TestCase):
     def test_version(self):
-        self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 2))
+        self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 3))
 
     def test_constants(self):
         self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
@@ -45,4 +51,4 @@
         )
 
         for a in attrs:
-            self.assertTrue(hasattr(zstd, a))
+            self.assertTrue(hasattr(zstd, a), a)
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/tests/test_roundtrip.py
--- a/contrib/python-zstandard/tests/test_roundtrip.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/tests/test_roundtrip.py	Mon Feb 13 09:44:16 2017 -0800
@@ -13,10 +13,14 @@
 
 import zstd
 
+from .common import (
+    make_cffi,
+)
 
 compression_levels = strategies.integers(min_value=1, max_value=22)
 
 
+@make_cffi
 class TestRoundTrip(unittest.TestCase):
     @hypothesis.given(strategies.binary(), compression_levels)
     def test_compress_write_to(self, data, level):
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/tests/test_train_dictionary.py
--- a/contrib/python-zstandard/tests/test_train_dictionary.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/tests/test_train_dictionary.py	Mon Feb 13 09:44:16 2017 -0800
@@ -7,6 +7,9 @@
 
 import zstd
 
+from . common import (
+    make_cffi,
+)
 
 if sys.version_info[0] >= 3:
     int_type = int
@@ -14,6 +17,7 @@
     int_type = long
 
 
+@make_cffi
 class TestTrainDictionary(unittest.TestCase):
     def test_no_args(self):
         with self.assertRaises(TypeError):
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd.c
--- a/contrib/python-zstandard/zstd.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd.c	Mon Feb 13 09:44:16 2017 -0800
@@ -34,6 +34,11 @@
 "Obtains a ``CompressionParameters`` instance from a compression level and\n"
 "optional input size and dictionary size");
 
+PyDoc_STRVAR(get_frame_parameters__doc__,
+"get_frame_parameters(data)\n"
+"\n"
+"Obtains a ``FrameParameters`` instance by parsing data.\n");
+
 PyDoc_STRVAR(train_dictionary__doc__,
 "train_dictionary(dict_size, samples)\n"
 "\n"
@@ -53,6 +58,8 @@
 	METH_NOARGS, estimate_decompression_context_size__doc__ },
 	{ "get_compression_parameters", (PyCFunction)get_compression_parameters,
 	METH_VARARGS, get_compression_parameters__doc__ },
+	{ "get_frame_parameters", (PyCFunction)get_frame_parameters,
+	METH_VARARGS, get_frame_parameters__doc__ },
 	{ "train_dictionary", (PyCFunction)train_dictionary,
 	METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
 	{ NULL, NULL }
@@ -70,6 +77,7 @@
 void decompressobj_module_init(PyObject* mod);
 void decompressionwriter_module_init(PyObject* mod);
 void decompressoriterator_module_init(PyObject* mod);
+void frameparams_module_init(PyObject* mod);
 
 void zstd_module_init(PyObject* m) {
 	/* python-zstandard relies on unstable zstd C API features. This means
@@ -87,7 +95,7 @@
 	   We detect this mismatch here and refuse to load the module if this
 	   scenario is detected.
 	*/
-	if (ZSTD_VERSION_NUMBER != 10102 || ZSTD_versionNumber() != 10102) {
+	if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
 		PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
 		return;
 	}
@@ -104,6 +112,7 @@
 	decompressobj_module_init(m);
 	decompressionwriter_module_init(m);
 	decompressoriterator_module_init(m);
+	frameparams_module_init(m);
 }
 
 #if PY_MAJOR_VERSION >= 3
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/common/mem.h
--- a/contrib/python-zstandard/zstd/common/mem.h	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/common/mem.h	Mon Feb 13 09:44:16 2017 -0800
@@ -39,7 +39,7 @@
 #endif
 
 /* code only tested on 32 and 64 bits systems */
-#define MEM_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
+#define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
 
 
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/common/pool.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/zstd/common/pool.c	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,194 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/* ======   Dependencies   ======= */
+#include <stddef.h>  /* size_t */
+#include <stdlib.h>  /* malloc, calloc, free */
+#include "pool.h"
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+#ifdef ZSTD_MULTITHREAD
+
+#include "threading.h"   /* pthread adaptation */
+
+/* A job is a function and an opaque argument */
+typedef struct POOL_job_s {
+  POOL_function function;
+  void *opaque;
+} POOL_job;
+
+struct POOL_ctx_s {
+    /* Keep track of the threads */
+    pthread_t *threads;
+    size_t numThreads;
+
+    /* The queue is a circular buffer */
+    POOL_job *queue;
+    size_t queueHead;
+    size_t queueTail;
+    size_t queueSize;
+    /* The mutex protects the queue */
+    pthread_mutex_t queueMutex;
+    /* Condition variable for pushers to wait on when the queue is full */
+    pthread_cond_t queuePushCond;
+    /* Condition variables for poppers to wait on when the queue is empty */
+    pthread_cond_t queuePopCond;
+    /* Indicates if the queue is shutting down */
+    int shutdown;
+};
+
+/* POOL_thread() :
+   Work thread for the thread pool.
+   Waits for jobs and executes them.
+   @returns : NULL on failure else non-null.
+*/
+static void* POOL_thread(void* opaque) {
+    POOL_ctx* const ctx = (POOL_ctx*)opaque;
+    if (!ctx) { return NULL; }
+    for (;;) {
+        /* Lock the mutex and wait for a non-empty queue or until shutdown */
+        pthread_mutex_lock(&ctx->queueMutex);
+        while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
+            pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
+        }
+        /* empty => shutting down: so stop */
+        if (ctx->queueHead == ctx->queueTail) {
+            pthread_mutex_unlock(&ctx->queueMutex);
+            return opaque;
+        }
+        /* Pop a job off the queue */
+        {   POOL_job const job = ctx->queue[ctx->queueHead];
+            ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
+            /* Unlock the mutex, signal a pusher, and run the job */
+            pthread_mutex_unlock(&ctx->queueMutex);
+            pthread_cond_signal(&ctx->queuePushCond);
+            job.function(job.opaque);
+        }
+    }
+    /* Unreachable */
+}
+
+POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
+    POOL_ctx *ctx;
+    /* Check the parameters */
+    if (!numThreads || !queueSize) { return NULL; }
+    /* Allocate the context and zero initialize */
+    ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
+    if (!ctx) { return NULL; }
+    /* Initialize the job queue.
+     * It needs one extra space since one space is wasted to differentiate empty
+     * and full queues.
+     */
+    ctx->queueSize = queueSize + 1;
+    ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job));
+    ctx->queueHead = 0;
+    ctx->queueTail = 0;
+    pthread_mutex_init(&ctx->queueMutex, NULL);
+    pthread_cond_init(&ctx->queuePushCond, NULL);
+    pthread_cond_init(&ctx->queuePopCond, NULL);
+    ctx->shutdown = 0;
+    /* Allocate space for the thread handles */
+    ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t));
+    ctx->numThreads = 0;
+    /* Check for errors */
+    if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
+    /* Initialize the threads */
+    {   size_t i;
+        for (i = 0; i < numThreads; ++i) {
+            if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
+                ctx->numThreads = i;
+                POOL_free(ctx);
+                return NULL;
+        }   }
+        ctx->numThreads = numThreads;
+    }
+    return ctx;
+}
+
+/*! POOL_join() :
+    Shutdown the queue, wake any sleeping threads, and join all of the threads.
+*/
+static void POOL_join(POOL_ctx *ctx) {
+    /* Shut down the queue */
+    pthread_mutex_lock(&ctx->queueMutex);
+    ctx->shutdown = 1;
+    pthread_mutex_unlock(&ctx->queueMutex);
+    /* Wake up sleeping threads */
+    pthread_cond_broadcast(&ctx->queuePushCond);
+    pthread_cond_broadcast(&ctx->queuePopCond);
+    /* Join all of the threads */
+    {   size_t i;
+        for (i = 0; i < ctx->numThreads; ++i) {
+            pthread_join(ctx->threads[i], NULL);
+    }   }
+}
+
+void POOL_free(POOL_ctx *ctx) {
+    if (!ctx) { return; }
+    POOL_join(ctx);
+    pthread_mutex_destroy(&ctx->queueMutex);
+    pthread_cond_destroy(&ctx->queuePushCond);
+    pthread_cond_destroy(&ctx->queuePopCond);
+    if (ctx->queue) free(ctx->queue);
+    if (ctx->threads) free(ctx->threads);
+    free(ctx);
+}
+
+void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {
+    POOL_ctx *ctx = (POOL_ctx *)ctxVoid;
+    if (!ctx) { return; }
+
+    pthread_mutex_lock(&ctx->queueMutex);
+    {   POOL_job const job = {function, opaque};
+        /* Wait until there is space in the queue for the new job */
+        size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
+        while (ctx->queueHead == newTail && !ctx->shutdown) {
+          pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+          newTail = (ctx->queueTail + 1) % ctx->queueSize;
+        }
+        /* The queue is still going => there is space */
+        if (!ctx->shutdown) {
+            ctx->queue[ctx->queueTail] = job;
+            ctx->queueTail = newTail;
+        }
+    }
+    pthread_mutex_unlock(&ctx->queueMutex);
+    pthread_cond_signal(&ctx->queuePopCond);
+}
+
+#else  /* ZSTD_MULTITHREAD  not defined */
+/* No multi-threading support */
+
+/* We don't need any data, but if it is empty malloc() might return NULL. */
+struct POOL_ctx_s {
+  int data;
+};
+
+POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
+  (void)numThreads;
+  (void)queueSize;
+  return (POOL_ctx *)malloc(sizeof(POOL_ctx));
+}
+
+void POOL_free(POOL_ctx *ctx) {
+  if (ctx) free(ctx);
+}
+
+void POOL_add(void *ctx, POOL_function function, void *opaque) {
+  (void)ctx;
+  function(opaque);
+}
+
+#endif  /* ZSTD_MULTITHREAD */
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/common/pool.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/zstd/common/pool.h	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,56 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+#ifndef POOL_H
+#define POOL_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+#include <stddef.h>   /* size_t */
+
+typedef struct POOL_ctx_s POOL_ctx;
+
+/*! POOL_create() :
+    Create a thread pool with at most `numThreads` threads.
+    `numThreads` must be at least 1.
+    The maximum number of queued jobs before blocking is `queueSize`.
+    `queueSize` must be at least 1.
+    @return : The POOL_ctx pointer on success else NULL.
+*/
+POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
+
+/*! POOL_free() :
+    Free a thread pool returned by POOL_create().
+*/
+void POOL_free(POOL_ctx *ctx);
+
+/*! POOL_function :
+    The function type that can be added to a thread pool.
+*/
+typedef void (*POOL_function)(void *);
+/*! POOL_add_function :
+    The function type for a generic thread pool add function.
+*/
+typedef void (*POOL_add_function)(void *, POOL_function, void *);
+
+/*! POOL_add() :
+    Add the job `function(opaque)` to the thread pool.
+    Possibly blocks until there is room in the queue.
+    Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
+*/
+void POOL_add(void *ctx, POOL_function function, void *opaque);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/common/threading.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/zstd/common/threading.c	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,79 @@
+
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ */
+
+/**
+ * This file will hold wrapper for systems, which do not support pthreads
+ */
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4206)        /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */
+#endif
+
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+/**
+ * Windows minimalist Pthread Wrapper, based on :
+ * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+ */
+
+
+/* ===  Dependencies  === */
+#include <process.h>
+#include <errno.h>
+#include "threading.h"
+
+
+/* ===  Implementation  === */
+
+static unsigned __stdcall worker(void *arg)
+{
+    pthread_t* const thread = (pthread_t*) arg;
+    thread->arg = thread->start_routine(thread->arg);
+    return 0;
+}
+
+int pthread_create(pthread_t* thread, const void* unused,
+            void* (*start_routine) (void*), void* arg)
+{
+    (void)unused;
+    thread->arg = arg;
+    thread->start_routine = start_routine;
+    thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
+
+    if (!thread->handle)
+        return errno;
+    else
+        return 0;
+}
+
+int _pthread_join(pthread_t * thread, void **value_ptr)
+{
+    DWORD result;
+
+    if (!thread->handle) return 0;
+
+    result = WaitForSingleObject(thread->handle, INFINITE);
+    switch (result) {
+    case WAIT_OBJECT_0:
+        if (value_ptr) *value_ptr = thread->arg;
+        return 0;
+    case WAIT_ABANDONED:
+        return EINVAL;
+    default:
+        return GetLastError();
+    }
+}
+
+#endif   /* ZSTD_MULTITHREAD */
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/common/threading.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/zstd/common/threading.h	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,104 @@
+
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ */
+
+#ifndef THREADING_H_938743
+#define THREADING_H_938743
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+/**
+ * Windows minimalist Pthread Wrapper, based on :
+ * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+ */
+#ifdef WINVER
+#  undef WINVER
+#endif
+#define WINVER       0x0600
+
+#ifdef _WIN32_WINNT
+#  undef _WIN32_WINNT
+#endif
+#define _WIN32_WINNT 0x0600
+
+#ifndef WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
+#endif
+
+#include <windows.h>
+
+/* mutex */
+#define pthread_mutex_t           CRITICAL_SECTION
+#define pthread_mutex_init(a,b)   InitializeCriticalSection((a))
+#define pthread_mutex_destroy(a)  DeleteCriticalSection((a))
+#define pthread_mutex_lock(a)     EnterCriticalSection((a))
+#define pthread_mutex_unlock(a)   LeaveCriticalSection((a))
+
+/* condition variable */
+#define pthread_cond_t             CONDITION_VARIABLE
+#define pthread_cond_init(a, b)    InitializeConditionVariable((a))
+#define pthread_cond_destroy(a)    /* No delete */
+#define pthread_cond_wait(a, b)    SleepConditionVariableCS((a), (b), INFINITE)
+#define pthread_cond_signal(a)     WakeConditionVariable((a))
+#define pthread_cond_broadcast(a)  WakeAllConditionVariable((a))
+
+/* pthread_create() and pthread_join() */
+typedef struct {
+    HANDLE handle;
+    void* (*start_routine)(void*);
+    void* arg;
+} pthread_t;
+
+int pthread_create(pthread_t* thread, const void* unused,
+                   void* (*start_routine) (void*), void* arg);
+
+#define pthread_join(a, b) _pthread_join(&(a), (b))
+int _pthread_join(pthread_t* thread, void** value_ptr);
+
+/**
+ * add here more wrappers as required
+ */
+
+
+#elif defined(ZSTD_MULTITHREAD)   /* posix assumed ; need a better detection mathod */
+/* ===   POSIX Systems   === */
+#  include <pthread.h>
+
+#else  /* ZSTD_MULTITHREAD not defined */
+/* No multithreading support */
+
+#define pthread_mutex_t int   /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */
+#define pthread_mutex_init(a,b)
+#define pthread_mutex_destroy(a)
+#define pthread_mutex_lock(a)
+#define pthread_mutex_unlock(a)
+
+#define pthread_cond_t int
+#define pthread_cond_init(a,b)
+#define pthread_cond_destroy(a)
+#define pthread_cond_wait(a,b)
+#define pthread_cond_signal(a)
+#define pthread_cond_broadcast(a)
+
+/* do not use pthread_t */
+
+#endif /* ZSTD_MULTITHREAD */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* THREADING_H_938743 */
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/common/zstd_common.c
--- a/contrib/python-zstandard/zstd/common/zstd_common.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/common/zstd_common.c	Mon Feb 13 09:44:16 2017 -0800
@@ -43,10 +43,6 @@
 *   provides error code string from enum */
 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
 
-/* ---   ZBUFF Error Management  (deprecated)   --- */
-unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
-const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
-
 
 /*=**************************************************************
 *  Custom allocator
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/common/zstd_errors.h
--- a/contrib/python-zstandard/zstd/common/zstd_errors.h	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/common/zstd_errors.h	Mon Feb 13 09:44:16 2017 -0800
@@ -18,6 +18,20 @@
 #include <stddef.h>   /* size_t */
 
 
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#  define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#else
+#  define ZSTDERRORLIB_VISIBILITY
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+#endif
+
 /*-****************************************
 *  error codes list
 ******************************************/
@@ -49,8 +63,8 @@
 /*! ZSTD_getErrorCode() :
     convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
     which can be used to compare directly with enum list published into "error_public.h" */
-ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
-const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
 
 
 #if defined (__cplusplus)
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/common/zstd_internal.h
--- a/contrib/python-zstandard/zstd/common/zstd_internal.h	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/common/zstd_internal.h	Mon Feb 13 09:44:16 2017 -0800
@@ -267,4 +267,13 @@
 }
 
 
+/* hidden functions */
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);
+
+
 #endif   /* ZSTD_CCOMMON_H_MODULE */
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/compress/zstd_compress.c
--- a/contrib/python-zstandard/zstd/compress/zstd_compress.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/compress/zstd_compress.c	Mon Feb 13 09:44:16 2017 -0800
@@ -51,8 +51,7 @@
 /*-*************************************
 *  Context memory management
 ***************************************/
-struct ZSTD_CCtx_s
-{
+struct ZSTD_CCtx_s {
     const BYTE* nextSrc;    /* next block here to continue on current prefix */
     const BYTE* base;       /* All regular indexes relative to this position */
     const BYTE* dictBase;   /* extDict indexes relative to this position */
@@ -61,10 +60,11 @@
     U32   nextToUpdate;     /* index from which to continue dictionary update */
     U32   nextToUpdate3;    /* index from which to continue dictionary update */
     U32   hashLog3;         /* dispatch table : larger == faster, more memory */
-    U32   loadedDictEnd;
+    U32   loadedDictEnd;    /* index of end of dictionary */
+    U32   forceWindow;      /* force back-references to respect limit of 1<<wLog, even for dictionary */
     ZSTD_compressionStage_e stage;
     U32   rep[ZSTD_REP_NUM];
-    U32   savedRep[ZSTD_REP_NUM];
+    U32   repToConfirm[ZSTD_REP_NUM];
     U32   dictID;
     ZSTD_parameters params;
     void* workSpace;
@@ -101,7 +101,7 @@
     cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
     if (!cctx) return NULL;
     memset(cctx, 0, sizeof(ZSTD_CCtx));
-    memcpy(&(cctx->customMem), &customMem, sizeof(customMem));
+    cctx->customMem = customMem;
     return cctx;
 }
 
@@ -119,6 +119,15 @@
     return sizeof(*cctx) + cctx->workSpaceSize;
 }
 
+size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
+{
+    switch(param)
+    {
+    case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
+    default: return ERROR(parameter_unknown);
+    }
+}
+
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx)   /* hidden interface */
 {
     return &(ctx->seqStore);
@@ -318,6 +327,14 @@
     }
 }
 
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
+    int i;
+    for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
+}
 
 /*! ZSTD_copyCCtx() :
 *   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
@@ -735,12 +752,19 @@
       if ((size_t)(op-ostart) >= maxCSize) return 0; }
 
     /* confirm repcodes */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
 
     return op - ostart;
 }
 
 
+#if 0 /* for debug */
+#  define STORESEQ_DEBUG
+#include <stdio.h>   /* fprintf */
+U32 g_startDebug = 0;
+const BYTE* g_start = NULL;
+#endif
+
 /*! ZSTD_storeSeq() :
     Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
     `offsetCode` : distance to match, or 0 == repCode.
@@ -748,13 +772,14 @@
 */
 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
 {
-#if 0  /* for debug */
-    static const BYTE* g_start = NULL;
-    const U32 pos = (U32)((const BYTE*)literals - g_start);
-    if (g_start==NULL) g_start = (const BYTE*)literals;
-    //if ((pos > 1) && (pos < 50000))
-        printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
-               pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
+#ifdef STORESEQ_DEBUG
+    if (g_startDebug) {
+        const U32 pos = (U32)((const BYTE*)literals - g_start);
+        if (g_start==NULL) g_start = (const BYTE*)literals;
+        if ((pos > 1895000) && (pos < 1895300))
+            fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
+                   pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
+    }
 #endif
     /* copy Literals */
     ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
@@ -1004,8 +1029,8 @@
     }   }   }
 
     /* save reps for next block */
-    cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
-    cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
+    cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+    cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1119,7 +1144,7 @@
     }   }   }
 
     /* save reps for next block */
-    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1273,8 +1298,8 @@
     }   }   }
 
     /* save reps for next block */
-    cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
-    cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
+    cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+    cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1423,7 +1448,7 @@
     }   }   }
 
     /* save reps for next block */
-    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1955,8 +1980,8 @@
     }   }
 
     /* Save reps for next block */
-    ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
-    ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
+    ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
+    ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -2150,7 +2175,7 @@
     }   }
 
     /* Save reps for next block */
-    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -2409,12 +2434,14 @@
 
     cctx->nextSrc = ip + srcSize;
 
-    {   size_t const cSize = frame ?
+    if (srcSize) {
+        size_t const cSize = frame ?
                              ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
                              ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
         if (ZSTD_isError(cSize)) return cSize;
         return cSize + fhSize;
-    }
+    } else
+        return fhSize;
 }
 
 
@@ -2450,7 +2477,7 @@
     zc->dictBase = zc->base;
     zc->base += ip - zc->nextSrc;
     zc->nextToUpdate = zc->dictLimit;
-    zc->loadedDictEnd = (U32)(iend - zc->base);
+    zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
 
     zc->nextSrc = iend;
     if (srcSize <= HASH_READ_SIZE) return 0;
@@ -2557,9 +2584,9 @@
     }
 
     if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-    cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
-    cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
-    cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+    cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] == 0 || cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+    cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] == 0 || cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+    cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
     dictPtr += 12;
 
     {   U32 offcodeMax = MaxOff;
@@ -2594,7 +2621,6 @@
     }
 }
 
-
 /*! ZSTD_compressBegin_internal() :
 *   @return : 0, or an error code */
 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
@@ -2626,9 +2652,9 @@
 }
 
 
-size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
+size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
 {
-    return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
+    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
 }
 
 
@@ -2733,7 +2759,8 @@
 /* =====  Dictionary API  ===== */
 
 struct ZSTD_CDict_s {
-    void* dictContent;
+    void* dictBuffer;
+    const void* dictContent;
     size_t dictContentSize;
     ZSTD_CCtx* refContext;
 };  /* typedef'd tp ZSTD_CDict within "zstd.h" */
@@ -2741,39 +2768,45 @@
 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
 {
     if (cdict==NULL) return 0;   /* support sizeof on NULL */
-    return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize;
+    return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
 }
 
-ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
+                                      ZSTD_parameters params, ZSTD_customMem customMem)
 {
     if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
     if (!customMem.customAlloc || !customMem.customFree) return NULL;
 
     {   ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
-        void* const dictContent = ZSTD_malloc(dictSize, customMem);
         ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
 
-        if (!dictContent || !cdict || !cctx) {
-            ZSTD_free(dictContent, customMem);
+        if (!cdict || !cctx) {
             ZSTD_free(cdict, customMem);
             ZSTD_free(cctx, customMem);
             return NULL;
         }
 
-        if (dictSize) {
-            memcpy(dictContent, dict, dictSize);
+        if ((byReference) || (!dictBuffer) || (!dictSize)) {
+            cdict->dictBuffer = NULL;
+            cdict->dictContent = dictBuffer;
+        } else {
+            void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
+            if (!internalBuffer) { ZSTD_free(cctx, customMem); ZSTD_free(cdict, customMem); return NULL; }
+            memcpy(internalBuffer, dictBuffer, dictSize);
+            cdict->dictBuffer = internalBuffer;
+            cdict->dictContent = internalBuffer;
         }
-        {   size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
+
+        {   size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
             if (ZSTD_isError(errorCode)) {
-                ZSTD_free(dictContent, customMem);
+                ZSTD_free(cdict->dictBuffer, customMem);
+                ZSTD_free(cctx, customMem);
                 ZSTD_free(cdict, customMem);
-                ZSTD_free(cctx, customMem);
                 return NULL;
         }   }
 
-        cdict->dictContent = dictContent;
+        cdict->refContext = cctx;
         cdict->dictContentSize = dictSize;
-        cdict->refContext = cctx;
         return cdict;
     }
 }
@@ -2783,7 +2816,15 @@
     ZSTD_customMem const allocator = { NULL, NULL, NULL };
     ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
     params.fParams.contentSizeFlag = 1;
-    return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
+    return ZSTD_createCDict_advanced(dict, dictSize, 0, params, allocator);
+}
+
+ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
+    params.fParams.contentSizeFlag = 1;
+    return ZSTD_createCDict_advanced(dict, dictSize, 1, params, allocator);
 }
 
 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
@@ -2791,7 +2832,7 @@
     if (cdict==NULL) return 0;   /* support free on NULL */
     {   ZSTD_customMem const cMem = cdict->refContext->customMem;
         ZSTD_freeCCtx(cdict->refContext);
-        ZSTD_free(cdict->dictContent, cMem);
+        ZSTD_free(cdict->dictBuffer, cMem);
         ZSTD_free(cdict, cMem);
         return 0;
     }
@@ -2801,7 +2842,7 @@
     return ZSTD_getParamsFromCCtx(cdict->refContext);
 }
 
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
 {
     if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
     else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
@@ -2900,7 +2941,7 @@
 
 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
 {
-    if (zcs->inBuffSize==0) return ERROR(stage_wrong);   /* zcs has not been init at least once */
+    if (zcs->inBuffSize==0) return ERROR(stage_wrong);   /* zcs has not been init at least once => can't reset */
 
     if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
     else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
@@ -2937,9 +2978,9 @@
         if (zcs->outBuff == NULL) return ERROR(memory_allocation);
     }
 
-    if (dict) {
+    if (dict && dictSize >= 8) {
         ZSTD_freeCDict(zcs->cdictLocal);
-        zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
+        zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
         if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
         zcs->cdict = zcs->cdictLocal;
     } else zcs->cdict = NULL;
@@ -2956,6 +2997,7 @@
     ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
     size_t const initError =  ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
     zcs->cdict = cdict;
+    zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
     return initError;
 }
 
@@ -2967,7 +3009,8 @@
 
 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
 {
-    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
+    if (pledgedSrcSize) params.fParams.contentSizeFlag = 1;
     return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
 }
 
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/compress/zstd_opt.h
--- a/contrib/python-zstandard/zstd/compress/zstd_opt.h	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/compress/zstd_opt.h	Mon Feb 13 09:44:16 2017 -0800
@@ -38,7 +38,7 @@
 
     ssPtr->cachedLiterals = NULL;
     ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
-    ssPtr->staticPrices = 0; 
+    ssPtr->staticPrices = 0;
 
     if (ssPtr->litLengthSum == 0) {
         if (srcSize <= 1024) ssPtr->staticPrices = 1;
@@ -56,7 +56,7 @@
 
         for (u=0; u<=MaxLit; u++) {
             ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
-            ssPtr->litSum += ssPtr->litFreq[u]; 
+            ssPtr->litSum += ssPtr->litFreq[u];
         }
         for (u=0; u<=MaxLL; u++)
             ssPtr->litLengthFreq[u] = 1;
@@ -634,7 +634,7 @@
     }    }   /* for (cur=0; cur < last_pos; ) */
 
     /* Save reps for next block */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -825,7 +825,7 @@
 
             match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
 
-            if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
+            if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
                 best_mlen = matches[match_num-1].len;
                 best_off = matches[match_num-1].off;
                 last_pos = cur + 1;
@@ -835,7 +835,7 @@
             /* set prices using matches at position = cur */
             for (u = 0; u < match_num; u++) {
                 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
-                best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
+                best_mlen = matches[u].len;
 
                 while (mlen <= best_mlen) {
                     if (opt[cur].mlen == 1) {
@@ -907,7 +907,7 @@
     }    }   /* for (cur=0; cur < last_pos; ) */
 
     /* Save reps for next block */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
 
     /* Last Literals */
     {   size_t lastLLSize = iend - anchor;
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/compress/zstdmt_compress.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/zstd/compress/zstdmt_compress.c	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,740 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+
+/* ======   Tuning parameters   ====== */
+#define ZSTDMT_NBTHREADS_MAX 128
+
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+/* ======   Dependencies   ====== */
+#include <stdlib.h>   /* malloc */
+#include <string.h>   /* memcpy */
+#include "pool.h"     /* threadpool */
+#include "threading.h"  /* mutex */
+#include "zstd_internal.h"   /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
+#include "zstdmt_compress.h"
+#define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
+#include "xxhash.h"
+
+
+/* ======   Debug   ====== */
+#if 0
+
+#  include <stdio.h>
+#  include <unistd.h>
+#  include <sys/times.h>
+   static unsigned g_debugLevel = 3;
+#  define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
+#  define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
+
+#  define DEBUG_PRINTHEX(l,p,n) { \
+    unsigned debug_u;                   \
+    for (debug_u=0; debug_u<(n); debug_u++)           \
+        DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
+    DEBUGLOGRAW(l, " \n");       \
+}
+
+static unsigned long long GetCurrentClockTimeMicroseconds()
+{
+   static clock_t _ticksPerSecond = 0;
+   if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
+
+   struct tms junk; clock_t newTicks = (clock_t) times(&junk);
+   return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
+}
+
+#define MUTEX_WAIT_TIME_DLEVEL 5
+#define PTHREAD_MUTEX_LOCK(mutex) \
+if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
+   unsigned long long beforeTime = GetCurrentClockTimeMicroseconds(); \
+   pthread_mutex_lock(mutex); \
+   unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \
+   unsigned long long elapsedTime = (afterTime-beforeTime); \
+   if (elapsedTime > 1000) {  /* or whatever threshold you like; I'm using 1 millisecond here */ \
+      DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
+               elapsedTime, #mutex); \
+  } \
+} else pthread_mutex_lock(mutex);
+
+#else
+
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#  define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
+#  define DEBUG_PRINTHEX(l,p,n) {}
+
+#endif
+
+
+/* =====   Buffer Pool   ===== */
+
+typedef struct buffer_s {
+    void* start;
+    size_t size;
+} buffer_t;
+
+static const buffer_t g_nullBuffer = { NULL, 0 };
+
+typedef struct ZSTDMT_bufferPool_s {
+    unsigned totalBuffers;
+    unsigned nbBuffers;
+    buffer_t bTable[1];   /* variable size */
+} ZSTDMT_bufferPool;
+
+static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
+{
+    unsigned const maxNbBuffers = 2*nbThreads + 2;
+    ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t));
+    if (bufPool==NULL) return NULL;
+    bufPool->totalBuffers = maxNbBuffers;
+    bufPool->nbBuffers = 0;
+    return bufPool;
+}
+
+static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
+{
+    unsigned u;
+    if (!bufPool) return;   /* compatibility with free on NULL */
+    for (u=0; u<bufPool->totalBuffers; u++)
+        free(bufPool->bTable[u].start);
+    free(bufPool);
+}
+
+/* assumption : invocation from main thread only ! */
+static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
+{
+    if (pool->nbBuffers) {   /* try to use an existing buffer */
+        buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
+        size_t const availBufferSize = buf.size;
+        if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))   /* large enough, but not too much */
+            return buf;
+        free(buf.start);   /* size conditions not respected : scratch this buffer and create a new one */
+    }
+    /* create new buffer */
+    {   buffer_t buffer;
+        void* const start = malloc(bSize);
+        if (start==NULL) bSize = 0;
+        buffer.start = start;   /* note : start can be NULL if malloc fails ! */
+        buffer.size = bSize;
+        return buffer;
+    }
+}
+
+/* store buffer for later re-use, up to pool capacity */
+static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
+{
+    if (buf.start == NULL) return;   /* release on NULL */
+    if (pool->nbBuffers < pool->totalBuffers) {
+        pool->bTable[pool->nbBuffers++] = buf;   /* store for later re-use */
+        return;
+    }
+    /* Reached bufferPool capacity (should not happen) */
+    free(buf.start);
+}
+
+
+/* =====   CCtx Pool   ===== */
+
+typedef struct {
+    unsigned totalCCtx;
+    unsigned availCCtx;
+    ZSTD_CCtx* cctx[1];   /* variable size */
+} ZSTDMT_CCtxPool;
+
+/* assumption : CCtxPool invocation only from main thread */
+
+/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
+static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
+{
+    unsigned u;
+    for (u=0; u<pool->totalCCtx; u++)
+        ZSTD_freeCCtx(pool->cctx[u]);  /* note : compatible with free on NULL */
+    free(pool);
+}
+
+/* ZSTDMT_createCCtxPool() :
+ * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
+static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads)
+{
+    ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*));
+    if (!cctxPool) return NULL;
+    cctxPool->totalCCtx = nbThreads;
+    cctxPool->availCCtx = 1;   /* at least one cctx for single-thread mode */
+    cctxPool->cctx[0] = ZSTD_createCCtx();
+    if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
+    DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads);
+    return cctxPool;
+}
+
+static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
+{
+    if (pool->availCCtx) {
+        pool->availCCtx--;
+        return pool->cctx[pool->availCCtx];
+    }
+    return ZSTD_createCCtx();   /* note : can be NULL, when creation fails ! */
+}
+
+static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return;   /* compatibility with release on NULL */
+    if (pool->availCCtx < pool->totalCCtx)
+        pool->cctx[pool->availCCtx++] = cctx;
+    else
+        /* pool overflow : should not happen, since totalCCtx==nbThreads */
+        ZSTD_freeCCtx(cctx);
+}
+
+
+/* =====   Thread worker   ===== */
+
+typedef struct {
+    buffer_t buffer;
+    size_t filled;
+} inBuff_t;
+
+typedef struct {
+    ZSTD_CCtx* cctx;
+    buffer_t src;
+    const void* srcStart;
+    size_t   srcSize;
+    size_t   dictSize;
+    buffer_t dstBuff;
+    size_t   cSize;
+    size_t   dstFlushed;
+    unsigned firstChunk;
+    unsigned lastChunk;
+    unsigned jobCompleted;
+    unsigned jobScanned;
+    pthread_mutex_t* jobCompleted_mutex;
+    pthread_cond_t* jobCompleted_cond;
+    ZSTD_parameters params;
+    ZSTD_CDict* cdict;
+    unsigned long long fullFrameSize;
+} ZSTDMT_jobDescription;
+
+/* ZSTDMT_compressChunk() : POOL_function type */
+void ZSTDMT_compressChunk(void* jobDescription)
+{
+    ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
+    const void* const src = (const char*)job->srcStart + job->dictSize;
+    buffer_t const dstBuff = job->dstBuff;
+    DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
+    if (job->cdict) {
+        size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
+        if (job->cdict) DEBUGLOG(3, "using CDict ");
+        if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
+    } else {
+        size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
+        if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
+        ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
+    }
+    if (!job->firstChunk) {  /* flush frame header */
+        size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
+        if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
+        ZSTD_invalidateRepCodes(job->cctx);
+    }
+
+    DEBUGLOG(4, "Compressing : ");
+    DEBUG_PRINTHEX(4, job->srcStart, 12);
+    job->cSize = (job->lastChunk) ?   /* last chunk signal */
+                 ZSTD_compressEnd     (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
+                 ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
+    DEBUGLOG(3, "compressed %u bytes into %u bytes   (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
+
+_endJob:
+    PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
+    job->jobCompleted = 1;
+    job->jobScanned = 0;
+    pthread_cond_signal(job->jobCompleted_cond);
+    pthread_mutex_unlock(job->jobCompleted_mutex);
+}
+
+
+/* ------------------------------------------ */
+/* =====   Multi-threaded compression   ===== */
+/* ------------------------------------------ */
+
+struct ZSTDMT_CCtx_s {
+    POOL_ctx* factory;
+    ZSTDMT_bufferPool* buffPool;
+    ZSTDMT_CCtxPool* cctxPool;
+    pthread_mutex_t jobCompleted_mutex;
+    pthread_cond_t jobCompleted_cond;
+    size_t targetSectionSize;
+    size_t marginSize;
+    size_t inBuffSize;
+    size_t dictSize;
+    size_t targetDictSize;
+    inBuff_t inBuff;
+    ZSTD_parameters params;
+    XXH64_state_t xxhState;
+    unsigned nbThreads;
+    unsigned jobIDMask;
+    unsigned doneJobID;
+    unsigned nextJobID;
+    unsigned frameEnded;
+    unsigned allJobsCompleted;
+    unsigned overlapRLog;
+    unsigned long long frameContentSize;
+    size_t sectionSize;
+    ZSTD_CDict* cdict;
+    ZSTD_CStream* cstream;
+    ZSTDMT_jobDescription jobs[1];   /* variable size (must lies at the end) */
+};
+
+ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads)
+{
+    ZSTDMT_CCtx* cctx;
+    U32 const minNbJobs = nbThreads + 2;
+    U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
+    U32 const nbJobs = 1 << nbJobsLog2;
+    DEBUGLOG(5, "nbThreads : %u  ; minNbJobs : %u ;  nbJobsLog2 : %u ;  nbJobs : %u  \n",
+            nbThreads, minNbJobs, nbJobsLog2, nbJobs);
+    if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
+    cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription));
+    if (!cctx) return NULL;
+    cctx->nbThreads = nbThreads;
+    cctx->jobIDMask = nbJobs - 1;
+    cctx->allJobsCompleted = 1;
+    cctx->sectionSize = 0;
+    cctx->overlapRLog = 3;
+    cctx->factory = POOL_create(nbThreads, 1);
+    cctx->buffPool = ZSTDMT_createBufferPool(nbThreads);
+    cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads);
+    if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) {  /* one object was not created */
+        ZSTDMT_freeCCtx(cctx);
+        return NULL;
+    }
+    if (nbThreads==1) {
+        cctx->cstream = ZSTD_createCStream();
+        if (!cctx->cstream) {
+            ZSTDMT_freeCCtx(cctx); return NULL;
+    }   }
+    pthread_mutex_init(&cctx->jobCompleted_mutex, NULL);   /* Todo : check init function return */
+    pthread_cond_init(&cctx->jobCompleted_cond, NULL);
+    DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads);
+    return cctx;
+}
+
+/* ZSTDMT_releaseAllJobResources() :
+ * Ensure all workers are killed first. */
+static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
+{
+    unsigned jobID;
+    for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
+        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
+        mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
+        mtctx->jobs[jobID].src = g_nullBuffer;
+        ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
+        mtctx->jobs[jobID].cctx = NULL;
+    }
+    memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
+    ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
+    mtctx->inBuff.buffer = g_nullBuffer;
+    mtctx->allJobsCompleted = 1;
+}
+
+size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
+{
+    if (mtctx==NULL) return 0;   /* compatible with free on NULL */
+    POOL_free(mtctx->factory);
+    if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
+    ZSTDMT_freeBufferPool(mtctx->buffPool);  /* release job resources into pools first */
+    ZSTDMT_freeCCtxPool(mtctx->cctxPool);
+    ZSTD_freeCDict(mtctx->cdict);
+    ZSTD_freeCStream(mtctx->cstream);
+    pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
+    pthread_cond_destroy(&mtctx->jobCompleted_cond);
+    free(mtctx);
+    return 0;
+}
+
+size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
+{
+    switch(parameter)
+    {
+    case ZSTDMT_p_sectionSize :
+        mtctx->sectionSize = value;
+        return 0;
+    case ZSTDMT_p_overlapSectionLog :
+    DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
+        mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
+        return 0;
+    default :
+        return ERROR(compressionParameter_unsupported);
+    }
+}
+
+
+/* ------------------------------------------ */
+/* =====   Multi-threaded compression   ===== */
+/* ------------------------------------------ */
+
+size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
+                           void* dst, size_t dstCapacity,
+                     const void* src, size_t srcSize,
+                           int compressionLevel)
+{
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
+    size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
+    unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */;
+    unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
+    size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
+    size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize;   /* avoid too small last block */
+    size_t remainingSrcSize = srcSize;
+    const char* const srcStart = (const char*)src;
+    size_t frameStartPos = 0;
+
+    DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes  ", params.cParams.windowLog, (U32)chunkTargetSize);
+    DEBUGLOG(2, "nbChunks  : %2u   (chunkSize : %u bytes)   ", nbChunks, (U32)avgChunkSize);
+    params.fParams.contentSizeFlag = 1;
+
+    if (nbChunks==1) {   /* fallback to single-thread mode */
+        ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
+        return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
+    }
+
+    {   unsigned u;
+        for (u=0; u<nbChunks; u++) {
+            size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
+            size_t const dstBufferCapacity = u ? ZSTD_compressBound(chunkSize) : dstCapacity;
+            buffer_t const dstAsBuffer = { dst, dstCapacity };
+            buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : dstAsBuffer;
+            ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
+
+            if ((cctx==NULL) || (dstBuffer.start==NULL)) {
+                mtctx->jobs[u].cSize = ERROR(memory_allocation);   /* job result */
+                mtctx->jobs[u].jobCompleted = 1;
+                nbChunks = u+1;
+                break;   /* let's wait for previous jobs to complete, but don't start new ones */
+            }
+
+            mtctx->jobs[u].srcStart = srcStart + frameStartPos;
+            mtctx->jobs[u].srcSize = chunkSize;
+            mtctx->jobs[u].fullFrameSize = srcSize;
+            mtctx->jobs[u].params = params;
+            mtctx->jobs[u].dstBuff = dstBuffer;
+            mtctx->jobs[u].cctx = cctx;
+            mtctx->jobs[u].firstChunk = (u==0);
+            mtctx->jobs[u].lastChunk = (u==nbChunks-1);
+            mtctx->jobs[u].jobCompleted = 0;
+            mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
+            mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
+
+            DEBUGLOG(3, "posting job %u   (%u bytes)", u, (U32)chunkSize);
+            DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12);
+            POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
+
+            frameStartPos += chunkSize;
+            remainingSrcSize -= chunkSize;
+    }   }
+    /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
+
+    {   unsigned chunkID;
+        size_t error = 0, dstPos = 0;
+        for (chunkID=0; chunkID<nbChunks; chunkID++) {
+            DEBUGLOG(3, "waiting for chunk %u ", chunkID);
+            PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
+            while (mtctx->jobs[chunkID].jobCompleted==0) {
+                DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID);
+                pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
+            }
+            pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
+            DEBUGLOG(3, "ready to write chunk %u ", chunkID);
+
+            ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
+            mtctx->jobs[chunkID].cctx = NULL;
+            mtctx->jobs[chunkID].srcStart = NULL;
+            {   size_t const cSize = mtctx->jobs[chunkID].cSize;
+                if (ZSTD_isError(cSize)) error = cSize;
+                if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
+                if (chunkID) {   /* note : chunk 0 is already written directly into dst */
+                    if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);
+                    ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
+                    mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
+                }
+                dstPos += cSize ;
+            }
+        }
+        if (!error) DEBUGLOG(3, "compressed size : %u  ", (U32)dstPos);
+        return error ? error : dstPos;
+    }
+
+}
+
+
+/* ====================================== */
+/* =======      Streaming API     ======= */
+/* ====================================== */
+
+static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
+    while (zcs->doneJobID < zcs->nextJobID) {
+        unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
+        PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
+        while (zcs->jobs[jobID].jobCompleted==0) {
+            DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID);   /* we want to block when waiting for data to flush */
+            pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
+        }
+        pthread_mutex_unlock(&zcs->jobCompleted_mutex);
+        zcs->doneJobID++;
+    }
+}
+
+
+static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
+                                    const void* dict, size_t dictSize, unsigned updateDict,
+                                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    ZSTD_customMem const cmem = { NULL, NULL, NULL };
+    DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog);
+    if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize);
+    if (zcs->allJobsCompleted == 0) {   /* previous job not correctly finished */
+        ZSTDMT_waitForAllJobsCompleted(zcs);
+        ZSTDMT_releaseAllJobResources(zcs);
+        zcs->allJobsCompleted = 1;
+    }
+    zcs->params = params;
+    if (updateDict) {
+        ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
+        if (dict && dictSize) {
+            zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params, cmem);
+            if (zcs->cdict == NULL) return ERROR(memory_allocation);
+    }   }
+    zcs->frameContentSize = pledgedSrcSize;
+    zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
+    DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
+    DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
+    zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
+    zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
+    zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
+    DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
+    zcs->marginSize = zcs->targetSectionSize >> 2;
+    zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
+    zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
+    if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
+    zcs->inBuff.filled = 0;
+    zcs->dictSize = 0;
+    zcs->doneJobID = 0;
+    zcs->nextJobID = 0;
+    zcs->frameEnded = 0;
+    zcs->allJobsCompleted = 0;
+    if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
+    return 0;
+}
+
+size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs,
+                                const void* dict, size_t dictSize,
+                                ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize);
+}
+
+/* ZSTDMT_resetCStream() :
+ * pledgedSrcSize is optional and can be zero == unknown */
+size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
+{
+    if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize);
+    return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
+}
+
+size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
+    return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0);
+}
+
+
+static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
+{
+    size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
+    buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
+    ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
+    unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
+
+    if ((cctx==NULL) || (dstBuffer.start==NULL)) {
+        zcs->jobs[jobID].jobCompleted = 1;
+        zcs->nextJobID++;
+        ZSTDMT_waitForAllJobsCompleted(zcs);
+        ZSTDMT_releaseAllJobResources(zcs);
+        return ERROR(memory_allocation);
+    }
+
+    DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
+    zcs->jobs[jobID].src = zcs->inBuff.buffer;
+    zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
+    zcs->jobs[jobID].srcSize = srcSize;
+    zcs->jobs[jobID].dictSize = zcs->dictSize;   /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */
+    zcs->jobs[jobID].params = zcs->params;
+    if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;  /* do not calculate checksum within sections, just keep it in header for first section */
+    zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
+    zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
+    zcs->jobs[jobID].dstBuff = dstBuffer;
+    zcs->jobs[jobID].cctx = cctx;
+    zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
+    zcs->jobs[jobID].lastChunk = endFrame;
+    zcs->jobs[jobID].jobCompleted = 0;
+    zcs->jobs[jobID].dstFlushed = 0;
+    zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
+    zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
+
+    /* get a new buffer for next input */
+    if (!endFrame) {
+        size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
+        zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
+        if (zcs->inBuff.buffer.start == NULL) {   /* not enough memory to allocate next input buffer */
+            zcs->jobs[jobID].jobCompleted = 1;
+            zcs->nextJobID++;
+            ZSTDMT_waitForAllJobsCompleted(zcs);
+            ZSTDMT_releaseAllJobResources(zcs);
+            return ERROR(memory_allocation);
+        }
+        DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
+        zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
+        DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize));
+        memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled);
+        DEBUGLOG(5, "new inBuff pre-filled");
+        zcs->dictSize = newDictSize;
+    } else {
+        zcs->inBuff.buffer = g_nullBuffer;
+        zcs->inBuff.filled = 0;
+        zcs->dictSize = 0;
+        zcs->frameEnded = 1;
+        if (zcs->nextJobID == 0)
+            zcs->params.fParams.checksumFlag = 0;   /* single chunk : checksum is calculated directly within worker thread */
+    }
+
+    DEBUGLOG(3, "posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask);
+    POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]);   /* this call is blocking when thread worker pool is exhausted */
+    zcs->nextJobID++;
+    return 0;
+}
+
+
+/* ZSTDMT_flushNextJob() :
+ * output : will be updated with amount of data flushed .
+ * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
+ * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
+static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
+{
+    unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
+    if (zcs->doneJobID == zcs->nextJobID) return 0;   /* all flushed ! */
+    PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
+    while (zcs->jobs[wJobID].jobCompleted==0) {
+        DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
+        if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; }  /* nothing ready to be flushed => skip */
+        pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);  /* block when nothing available to flush */
+    }
+    pthread_mutex_unlock(&zcs->jobCompleted_mutex);
+    /* compression job completed : output can be flushed */
+    {   ZSTDMT_jobDescription job = zcs->jobs[wJobID];
+        if (!job.jobScanned) {
+            if (ZSTD_isError(job.cSize)) {
+                DEBUGLOG(5, "compression error detected ");
+                ZSTDMT_waitForAllJobsCompleted(zcs);
+                ZSTDMT_releaseAllJobResources(zcs);
+                return job.cSize;
+            }
+            ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
+            zcs->jobs[wJobID].cctx = NULL;
+            DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
+            if (zcs->params.fParams.checksumFlag) {
+                XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
+                if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) {  /* write checksum at end of last section */
+                    U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
+                    DEBUGLOG(4, "writing checksum : %08X \n", checksum);
+                    MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
+                    job.cSize += 4;
+                    zcs->jobs[wJobID].cSize += 4;
+            }   }
+            ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
+            zcs->jobs[wJobID].srcStart = NULL;
+            zcs->jobs[wJobID].src = g_nullBuffer;
+            zcs->jobs[wJobID].jobScanned = 1;
+        }
+        {   size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
+            DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
+            memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
+            output->pos += toWrite;
+            job.dstFlushed += toWrite;
+        }
+        if (job.dstFlushed == job.cSize) {   /* output buffer fully flushed => move to next one */
+            ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
+            zcs->jobs[wJobID].dstBuff = g_nullBuffer;
+            zcs->jobs[wJobID].jobCompleted = 0;
+            zcs->doneJobID++;
+        } else {
+            zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
+        }
+        /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
+        if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
+        if (zcs->doneJobID < zcs->nextJobID) return 1;   /* still some buffer to flush */
+        zcs->allJobsCompleted = zcs->frameEnded;   /* frame completed and entirely flushed */
+        return 0;   /* everything flushed */
+}   }
+
+
+size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize;
+    if (zcs->frameEnded) return ERROR(stage_wrong);   /* current frame being ended. Only flush is allowed. Restart with init */
+    if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input);
+
+    /* fill input buffer */
+    {   size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled);
+        memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad);
+        input->pos += toLoad;
+        zcs->inBuff.filled += toLoad;
+    }
+
+    if ( (zcs->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
+        && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {   /* avoid overwriting job round buffer */
+        CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) );
+    }
+
+    /* check for data to flush */
+    CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */
+
+    /* recommended next input size : fill current input buffer */
+    return zcs->inBuffSize - zcs->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
+}
+
+
+static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame)
+{
+    size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
+
+    if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
+    if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
+       && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
+        CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
+    }
+
+    /* check if there is any data available to flush */
+    DEBUGLOG(5, "zcs->doneJobID : %u  ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID);
+    return ZSTDMT_flushNextJob(zcs, output, 1);
+}
+
+
+size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
+{
+    if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output);
+    return ZSTDMT_flushStream_internal(zcs, output, 0);
+}
+
+size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
+{
+    if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output);
+    return ZSTDMT_flushStream_internal(zcs, output, 1);
+}
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/compress/zstdmt_compress.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/zstd/compress/zstdmt_compress.h	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,78 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+ #ifndef ZSTDMT_COMPRESS_H
+ #define ZSTDMT_COMPRESS_H
+
+ #if defined (__cplusplus)
+ extern "C" {
+ #endif
+
+
+/* Note : All prototypes defined in this file shall be considered experimental.
+ *        There is no guarantee of API continuity (yet) on any of these prototypes */
+
+/* ===   Dependencies   === */
+#include <stddef.h>   /* size_t */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
+#include "zstd.h"     /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
+
+
+/* ===   Simple one-pass functions   === */
+
+typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
+ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
+ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx);
+
+ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx,
+                           void* dst, size_t dstCapacity,
+                     const void* src, size_t srcSize,
+                           int compressionLevel);
+
+
+/* ===   Streaming functions   === */
+
+ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);    /**< pledgedSrcSize is optional and can be zero == unknown */
+
+ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+
+
+/* ===   Advanced functions and parameters  === */
+
+#ifndef ZSTDMT_SECTION_SIZE_MIN
+#  define ZSTDMT_SECTION_SIZE_MIN (1U << 20)   /* 1 MB - Minimum size of each compression job */
+#endif
+
+ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize,  /**< dict can be released after init, a local copy is preserved within zcs */
+                                          ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be zero == unknown */
+
+/* ZSDTMT_parameter :
+ * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
+typedef enum {
+    ZSTDMT_p_sectionSize,        /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
+    ZSTDMT_p_overlapSectionLog   /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
+} ZSDTMT_parameter;
+
+/* ZSTDMT_setMTCtxParameter() :
+ * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
+ * The function must be called typically after ZSTD_createCCtx().
+ * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTDMT_COMPRESS_H */
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/decompress/zstd_decompress.c
--- a/contrib/python-zstandard/zstd/decompress/zstd_decompress.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/decompress/zstd_decompress.c	Mon Feb 13 09:44:16 2017 -0800
@@ -1444,7 +1444,7 @@
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
     if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
 #endif
-    ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
+    CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
     ZSTD_checkContinuity(dctx, dst);
     return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
 }
@@ -1671,9 +1671,9 @@
     }
 
     if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-    dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
-    dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
-    dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+    dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+    dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+    dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
     dictPtr += 12;
 
     dctx->litEntropy = dctx->fseEntropy = 1;
@@ -1713,39 +1713,44 @@
 /* ======   ZSTD_DDict   ====== */
 
 struct ZSTD_DDict_s {
-    void* dict;
+    void* dictBuffer;
+    const void* dictContent;
     size_t dictSize;
     ZSTD_DCtx* refContext;
 };  /* typedef'd to ZSTD_DDict within "zstd.h" */
 
-ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem)
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
 {
     if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
     if (!customMem.customAlloc || !customMem.customFree) return NULL;
 
     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
-        void* const dictContent = ZSTD_malloc(dictSize, customMem);
         ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
 
-        if (!dictContent || !ddict || !dctx) {
-            ZSTD_free(dictContent, customMem);
+        if (!ddict || !dctx) {
             ZSTD_free(ddict, customMem);
             ZSTD_free(dctx, customMem);
             return NULL;
         }
 
-        if (dictSize) {
-            memcpy(dictContent, dict, dictSize);
+        if ((byReference) || (!dict) || (!dictSize)) {
+            ddict->dictBuffer = NULL;
+            ddict->dictContent = dict;
+        } else {
+            void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
+            if (!internalBuffer) { ZSTD_free(dctx, customMem); ZSTD_free(ddict, customMem); return NULL; }
+            memcpy(internalBuffer, dict, dictSize);
+            ddict->dictBuffer = internalBuffer;
+            ddict->dictContent = internalBuffer;
         }
-        {   size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize);
+        {   size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, ddict->dictContent, dictSize);
             if (ZSTD_isError(errorCode)) {
-                ZSTD_free(dictContent, customMem);
+                ZSTD_free(ddict->dictBuffer, customMem);
                 ZSTD_free(ddict, customMem);
                 ZSTD_free(dctx, customMem);
                 return NULL;
         }   }
 
-        ddict->dict = dictContent;
         ddict->dictSize = dictSize;
         ddict->refContext = dctx;
         return ddict;
@@ -1758,15 +1763,27 @@
 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
 {
     ZSTD_customMem const allocator = { NULL, NULL, NULL };
-    return ZSTD_createDDict_advanced(dict, dictSize, allocator);
+    return ZSTD_createDDict_advanced(dict, dictSize, 0, allocator);
 }
 
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is simply referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dictBuffer, dictSize, 1, allocator);
+}
+
+
 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
 {
     if (ddict==NULL) return 0;   /* support free on NULL */
     {   ZSTD_customMem const cMem = ddict->refContext->customMem;
         ZSTD_freeDCtx(ddict->refContext);
-        ZSTD_free(ddict->dict, cMem);
+        ZSTD_free(ddict->dictBuffer, cMem);
         ZSTD_free(ddict, cMem);
         return 0;
     }
@@ -1775,7 +1792,7 @@
 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
 {
     if (ddict==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize;
+    return sizeof(*ddict) + ZSTD_sizeof_DCtx(ddict->refContext) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
 }
 
 /*! ZSTD_getDictID_fromDict() :
@@ -1796,7 +1813,7 @@
 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
 {
     if (ddict==NULL) return 0;
-    return ZSTD_getDictID_fromDict(ddict->dict, ddict->dictSize);
+    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
 }
 
 /*! ZSTD_getDictID_fromFrame() :
@@ -1827,7 +1844,7 @@
                             const ZSTD_DDict* ddict)
 {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize);
+    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dictContent, ddict->dictSize);
 #endif
     ZSTD_refDCtx(dctx, ddict->refContext);
     ZSTD_checkContinuity(dctx, dst);
@@ -1919,7 +1936,7 @@
     zds->stage = zdss_loadHeader;
     zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
     ZSTD_freeDDict(zds->ddictLocal);
-    if (dict) {
+    if (dict && dictSize >= 8) {
         zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
         if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
     } else zds->ddictLocal = NULL;
@@ -1956,7 +1973,7 @@
     switch(paramType)
     {
         default : return ERROR(parameter_unknown);
-        case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
+        case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
     }
     return 0;
 }
@@ -2007,7 +2024,7 @@
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
                 {   U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
                     if (legacyVersion) {
-                        const void* const dict = zds->ddict ? zds->ddict->dict : NULL;
+                        const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
                         size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
                         CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
                                                        dict, dictSize));
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/dictBuilder/cover.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/zstd/dictBuilder/cover.c	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,1021 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdio.h>  /* fprintf */
+#include <stdlib.h> /* malloc, free, qsort */
+#include <string.h> /* memset */
+#include <time.h>   /* clock */
+
+#include "mem.h" /* read */
+#include "pool.h"
+#include "threading.h"
+#include "zstd_internal.h" /* includes zstd.h */
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#define ZDICT_STATIC_LINKING_ONLY
+#endif
+#include "zdict.h"
+
+/*-*************************************
+*  Constants
+***************************************/
+#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
+
+/*-*************************************
+*  Console display
+***************************************/
+static int g_displayLevel = 2;
+#define DISPLAY(...)                                                           \
+  {                                                                            \
+    fprintf(stderr, __VA_ARGS__);                                              \
+    fflush(stderr);                                                            \
+  }
+#define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
+  if (displayLevel >= l) {                                                     \
+    DISPLAY(__VA_ARGS__);                                                      \
+  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
+
+#define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
+  if (displayLevel >= l) {                                                     \
+    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
+      g_time = clock();                                                        \
+      DISPLAY(__VA_ARGS__);                                                    \
+      if (displayLevel >= 4)                                                   \
+        fflush(stdout);                                                        \
+    }                                                                          \
+  }
+#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
+static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+
+/*-*************************************
+* Hash table
+***************************************
+* A small specialized hash map for storing activeDmers.
+* The map does not resize, so if it becomes full it will loop forever.
+* Thus, the map must be large enough to store every value.
+* The map implements linear probing and keeps its load less than 0.5.
+*/
+
+#define MAP_EMPTY_VALUE ((U32)-1)
+typedef struct COVER_map_pair_t_s {
+  U32 key;
+  U32 value;
+} COVER_map_pair_t;
+
+typedef struct COVER_map_s {
+  COVER_map_pair_t *data;
+  U32 sizeLog;
+  U32 size;
+  U32 sizeMask;
+} COVER_map_t;
+
+/**
+ * Clear the map.
+ */
+static void COVER_map_clear(COVER_map_t *map) {
+  memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
+}
+
+/**
+ * Initializes a map of the given size.
+ * Returns 1 on success and 0 on failure.
+ * The map must be destroyed with COVER_map_destroy().
+ * The map is only guaranteed to be large enough to hold size elements.
+ */
+static int COVER_map_init(COVER_map_t *map, U32 size) {
+  map->sizeLog = ZSTD_highbit32(size) + 2;
+  map->size = (U32)1 << map->sizeLog;
+  map->sizeMask = map->size - 1;
+  map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
+  if (!map->data) {
+    map->sizeLog = 0;
+    map->size = 0;
+    return 0;
+  }
+  COVER_map_clear(map);
+  return 1;
+}
+
+/**
+ * Internal hash function
+ */
+static const U32 prime4bytes = 2654435761U;
+static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
+  return (key * prime4bytes) >> (32 - map->sizeLog);
+}
+
+/**
+ * Helper function that returns the index that a key should be placed into.
+ */
+static U32 COVER_map_index(COVER_map_t *map, U32 key) {
+  const U32 hash = COVER_map_hash(map, key);
+  U32 i;
+  for (i = hash;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *pos = &map->data[i];
+    if (pos->value == MAP_EMPTY_VALUE) {
+      return i;
+    }
+    if (pos->key == key) {
+      return i;
+    }
+  }
+}
+
+/**
+ * Returns the pointer to the value for key.
+ * If key is not in the map, it is inserted and the value is set to 0.
+ * The map must not be full.
+ */
+static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
+  COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
+  if (pos->value == MAP_EMPTY_VALUE) {
+    pos->key = key;
+    pos->value = 0;
+  }
+  return &pos->value;
+}
+
+/**
+ * Deletes key from the map if present.
+ */
+static void COVER_map_remove(COVER_map_t *map, U32 key) {
+  U32 i = COVER_map_index(map, key);
+  COVER_map_pair_t *del = &map->data[i];
+  U32 shift = 1;
+  if (del->value == MAP_EMPTY_VALUE) {
+    return;
+  }
+  for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *const pos = &map->data[i];
+    /* If the position is empty we are done */
+    if (pos->value == MAP_EMPTY_VALUE) {
+      del->value = MAP_EMPTY_VALUE;
+      return;
+    }
+    /* If pos can be moved to del do so */
+    if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
+      del->key = pos->key;
+      del->value = pos->value;
+      del = pos;
+      shift = 1;
+    } else {
+      ++shift;
+    }
+  }
+}
+
+/**
+ * Destroyes a map that is inited with COVER_map_init().
+ */
+static void COVER_map_destroy(COVER_map_t *map) {
+  if (map->data) {
+    free(map->data);
+  }
+  map->data = NULL;
+  map->size = 0;
+}
+
+/*-*************************************
+* Context
+***************************************/
+
+typedef struct {
+  const BYTE *samples;
+  size_t *offsets;
+  const size_t *samplesSizes;
+  size_t nbSamples;
+  U32 *suffix;
+  size_t suffixSize;
+  U32 *freqs;
+  U32 *dmerAt;
+  unsigned d;
+} COVER_ctx_t;
+
+/* We need a global context for qsort... */
+static COVER_ctx_t *g_ctx = NULL;
+
+/*-*************************************
+*  Helper functions
+***************************************/
+
+/**
+ * Returns the sum of the sample sizes.
+ */
+static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
+  size_t sum = 0;
+  size_t i;
+  for (i = 0; i < nbSamples; ++i) {
+    sum += samplesSizes[i];
+  }
+  return sum;
+}
+
+/**
+ * Returns -1 if the dmer at lp is less than the dmer at rp.
+ * Return 0 if the dmers at lp and rp are equal.
+ * Returns 1 if the dmer at lp is greater than the dmer at rp.
+ */
+static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  const U32 lhs = *(const U32 *)lp;
+  const U32 rhs = *(const U32 *)rp;
+  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
+}
+
+/**
+ * Same as COVER_cmp() except ties are broken by pointer value
+ * NOTE: g_ctx must be set to call this function.  A global is required because
+ * qsort doesn't take an opaque pointer.
+ */
+static int COVER_strict_cmp(const void *lp, const void *rp) {
+  int result = COVER_cmp(g_ctx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}
+
+/**
+ * Returns the first pointer in [first, last) whose element does not compare
+ * less than value.  If no such element exists it returns last.
+ */
+static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
+                                       size_t value) {
+  size_t count = last - first;
+  while (count != 0) {
+    size_t step = count / 2;
+    const size_t *ptr = first;
+    ptr += step;
+    if (*ptr < value) {
+      first = ++ptr;
+      count -= step + 1;
+    } else {
+      count = step;
+    }
+  }
+  return first;
+}
+
+/**
+ * Generic groupBy function.
+ * Groups an array sorted by cmp into groups with equivalent values.
+ * Calls grp for each group.
+ */
+static void
+COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
+              int (*cmp)(COVER_ctx_t *, const void *, const void *),
+              void (*grp)(COVER_ctx_t *, const void *, const void *)) {
+  const BYTE *ptr = (const BYTE *)data;
+  size_t num = 0;
+  while (num < count) {
+    const BYTE *grpEnd = ptr + size;
+    ++num;
+    while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
+      grpEnd += size;
+      ++num;
+    }
+    grp(ctx, ptr, grpEnd);
+    ptr = grpEnd;
+  }
+}
+
+/*-*************************************
+*  Cover functions
+***************************************/
+
+/**
+ * Called on each group of positions with the same dmer.
+ * Counts the frequency of each dmer and saves it in the suffix array.
+ * Fills `ctx->dmerAt`.
+ */
+static void COVER_group(COVER_ctx_t *ctx, const void *group,
+                        const void *groupEnd) {
+  /* The group consists of all the positions with the same first d bytes. */
+  const U32 *grpPtr = (const U32 *)group;
+  const U32 *grpEnd = (const U32 *)groupEnd;
+  /* The dmerId is how we will reference this dmer.
+   * This allows us to map the whole dmer space to a much smaller space, the
+   * size of the suffix array.
+   */
+  const U32 dmerId = (U32)(grpPtr - ctx->suffix);
+  /* Count the number of samples this dmer shows up in */
+  U32 freq = 0;
+  /* Details */
+  const size_t *curOffsetPtr = ctx->offsets;
+  const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
+  /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
+   * different sample than the last.
+   */
+  size_t curSampleEnd = ctx->offsets[0];
+  for (; grpPtr != grpEnd; ++grpPtr) {
+    /* Save the dmerId for this position so we can get back to it. */
+    ctx->dmerAt[*grpPtr] = dmerId;
+    /* Dictionaries only help for the first reference to the dmer.
+     * After that zstd can reference the match from the previous reference.
+     * So only count each dmer once for each sample it is in.
+     */
+    if (*grpPtr < curSampleEnd) {
+      continue;
+    }
+    freq += 1;
+    /* Binary search to find the end of the sample *grpPtr is in.
+     * In the common case that grpPtr + 1 == grpEnd we can skip the binary
+     * search because the loop is over.
+     */
+    if (grpPtr + 1 != grpEnd) {
+      const size_t *sampleEndPtr =
+          COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
+      curSampleEnd = *sampleEndPtr;
+      curOffsetPtr = sampleEndPtr + 1;
+    }
+  }
+  /* At this point we are never going to look at this segment of the suffix
+   * array again.  We take advantage of this fact to save memory.
+   * We store the frequency of the dmer in the first position of the group,
+   * which is dmerId.
+   */
+  ctx->suffix[dmerId] = freq;
+}
+
+/**
+ * A segment is a range in the source as well as the score of the segment.
+ */
+typedef struct {
+  U32 begin;
+  U32 end;
+  double score;
+} COVER_segment_t;
+
+/**
+ * Selects the best segment in an epoch.
+ * Segments of are scored according to the function:
+ *
+ * Let F(d) be the frequency of dmer d.
+ * Let S_i be the dmer at position i of segment S which has length k.
+ *
+ *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
+ *
+ * Once the dmer d is in the dictionay we set F(d) = 0.
+ */
+static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
+                                           COVER_map_t *activeDmers, U32 begin,
+                                           U32 end, COVER_params_t parameters) {
+  /* Constants */
+  const U32 k = parameters.k;
+  const U32 d = parameters.d;
+  const U32 dmersInK = k - d + 1;
+  /* Try each segment (activeSegment) and save the best (bestSegment) */
+  COVER_segment_t bestSegment = {0, 0, 0};
+  COVER_segment_t activeSegment;
+  /* Reset the activeDmers in the segment */
+  COVER_map_clear(activeDmers);
+  /* The activeSegment starts at the beginning of the epoch. */
+  activeSegment.begin = begin;
+  activeSegment.end = begin;
+  activeSegment.score = 0;
+  /* Slide the activeSegment through the whole epoch.
+   * Save the best segment in bestSegment.
+   */
+  while (activeSegment.end < end) {
+    /* The dmerId for the dmer at the next position */
+    U32 newDmer = ctx->dmerAt[activeSegment.end];
+    /* The entry in activeDmers for this dmerId */
+    U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
+    /* If the dmer isn't already present in the segment add its score. */
+    if (*newDmerOcc == 0) {
+      /* The paper suggest using the L-0.5 norm, but experiments show that it
+       * doesn't help.
+       */
+      activeSegment.score += freqs[newDmer];
+    }
+    /* Add the dmer to the segment */
+    activeSegment.end += 1;
+    *newDmerOcc += 1;
+
+    /* If the window is now too large, drop the first position */
+    if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
+      U32 delDmer = ctx->dmerAt[activeSegment.begin];
+      U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
+      activeSegment.begin += 1;
+      *delDmerOcc -= 1;
+      /* If this is the last occurence of the dmer, subtract its score */
+      if (*delDmerOcc == 0) {
+        COVER_map_remove(activeDmers, delDmer);
+        activeSegment.score -= freqs[delDmer];
+      }
+    }
+
+    /* If this segment is the best so far save it */
+    if (activeSegment.score > bestSegment.score) {
+      bestSegment = activeSegment;
+    }
+  }
+  {
+    /* Trim off the zero frequency head and tail from the segment. */
+    U32 newBegin = bestSegment.end;
+    U32 newEnd = bestSegment.begin;
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      U32 freq = freqs[ctx->dmerAt[pos]];
+      if (freq != 0) {
+        newBegin = MIN(newBegin, pos);
+        newEnd = pos + 1;
+      }
+    }
+    bestSegment.begin = newBegin;
+    bestSegment.end = newEnd;
+  }
+  {
+    /* Zero out the frequency of each dmer covered by the chosen segment. */
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      freqs[ctx->dmerAt[pos]] = 0;
+    }
+  }
+  return bestSegment;
+}
+
+/**
+ * Check the validity of the parameters.
+ * Returns non-zero if the parameters are valid and 0 otherwise.
+ */
+static int COVER_checkParameters(COVER_params_t parameters) {
+  /* k and d are required parameters */
+  if (parameters.d == 0 || parameters.k == 0) {
+    return 0;
+  }
+  /* d <= k */
+  if (parameters.d > parameters.k) {
+    return 0;
+  }
+  return 1;
+}
+
+/**
+ * Clean up a context initialized with `COVER_ctx_init()`.
+ */
+static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
+  if (!ctx) {
+    return;
+  }
+  if (ctx->suffix) {
+    free(ctx->suffix);
+    ctx->suffix = NULL;
+  }
+  if (ctx->freqs) {
+    free(ctx->freqs);
+    ctx->freqs = NULL;
+  }
+  if (ctx->dmerAt) {
+    free(ctx->dmerAt);
+    ctx->dmerAt = NULL;
+  }
+  if (ctx->offsets) {
+    free(ctx->offsets);
+    ctx->offsets = NULL;
+  }
+}
+
+/**
+ * Prepare a context for dictionary building.
+ * The context is only dependent on the parameter `d` and can used multiple
+ * times.
+ * Returns 1 on success or zero on error.
+ * The context must be destroyed with `COVER_ctx_destroy()`.
+ */
+static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+                          const size_t *samplesSizes, unsigned nbSamples,
+                          unsigned d) {
+  const BYTE *const samples = (const BYTE *)samplesBuffer;
+  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
+  /* Checks */
+  if (totalSamplesSize < d ||
+      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
+    DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
+                 (COVER_MAX_SAMPLES_SIZE >> 20));
+    return 0;
+  }
+  /* Zero the context */
+  memset(ctx, 0, sizeof(*ctx));
+  DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
+               (U32)totalSamplesSize);
+  ctx->samples = samples;
+  ctx->samplesSizes = samplesSizes;
+  ctx->nbSamples = nbSamples;
+  /* Partial suffix array */
+  ctx->suffixSize = totalSamplesSize - d + 1;
+  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* Maps index to the dmerID */
+  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* The offsets of each file */
+  ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
+  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
+    DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
+    COVER_ctx_destroy(ctx);
+    return 0;
+  }
+  ctx->freqs = NULL;
+  ctx->d = d;
+
+  /* Fill offsets from the samlesSizes */
+  {
+    U32 i;
+    ctx->offsets[0] = 0;
+    for (i = 1; i <= nbSamples; ++i) {
+      ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
+    }
+  }
+  DISPLAYLEVEL(2, "Constructing partial suffix array\n");
+  {
+    /* suffix is a partial suffix array.
+     * It only sorts suffixes by their first parameters.d bytes.
+     * The sort is stable, so each dmer group is sorted by position in input.
+     */
+    U32 i;
+    for (i = 0; i < ctx->suffixSize; ++i) {
+      ctx->suffix[i] = i;
+    }
+    /* qsort doesn't take an opaque pointer, so pass as a global */
+    g_ctx = ctx;
+    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
+  }
+  DISPLAYLEVEL(2, "Computing frequencies\n");
+  /* For each dmer group (group of positions with the same first d bytes):
+   * 1. For each position we set dmerAt[position] = dmerID.  The dmerID is
+   *    (groupBeginPtr - suffix).  This allows us to go from position to
+   *    dmerID so we can look up values in freq.
+   * 2. We calculate how many samples the dmer occurs in and save it in
+   *    freqs[dmerId].
+   */
+  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
+                &COVER_group);
+  ctx->freqs = ctx->suffix;
+  ctx->suffix = NULL;
+  return 1;
+}
+
+/**
+ * Given the prepared context build the dictionary.
+ */
+static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
+                                    COVER_map_t *activeDmers, void *dictBuffer,
+                                    size_t dictBufferCapacity,
+                                    COVER_params_t parameters) {
+  BYTE *const dict = (BYTE *)dictBuffer;
+  size_t tail = dictBufferCapacity;
+  /* Divide the data up into epochs of equal size.
+   * We will select at least one segment from each epoch.
+   */
+  const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
+  const U32 epochSize = (U32)(ctx->suffixSize / epochs);
+  size_t epoch;
+  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
+               epochSize);
+  /* Loop through the epochs until there are no more segments or the dictionary
+   * is full.
+   */
+  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
+    const U32 epochBegin = (U32)(epoch * epochSize);
+    const U32 epochEnd = epochBegin + epochSize;
+    size_t segmentSize;
+    /* Select a segment */
+    COVER_segment_t segment = COVER_selectSegment(
+        ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
+    /* Trim the segment if necessary and if it is empty then we are done */
+    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
+    if (segmentSize == 0) {
+      break;
+    }
+    /* We fill the dictionary from the back to allow the best segments to be
+     * referenced with the smallest offsets.
+     */
+    tail -= segmentSize;
+    memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
+    DISPLAYUPDATE(
+        2, "\r%u%%       ",
+        (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
+  }
+  DISPLAYLEVEL(2, "\r%79s\r", "");
+  return tail;
+}
+
+/**
+ * Translate from COVER_params_t to ZDICT_params_t required for finalizing the
+ * dictionary.
+ */
+static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
+  ZDICT_params_t zdictParams;
+  memset(&zdictParams, 0, sizeof(zdictParams));
+  zdictParams.notificationLevel = 1;
+  zdictParams.dictID = parameters.dictID;
+  zdictParams.compressionLevel = parameters.compressionLevel;
+  return zdictParams;
+}
+
+/**
+ * Constructs a dictionary using a heuristic based on the following paper:
+ *
+ * Liao, Petri, Moffat, Wirth
+ * Effective Construction of Relative Lempel-Ziv Dictionaries
+ * Published in WWW 2016.
+ */
+ZDICTLIB_API size_t COVER_trainFromBuffer(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
+  BYTE *const dict = (BYTE *)dictBuffer;
+  COVER_ctx_t ctx;
+  COVER_map_t activeDmers;
+  /* Checks */
+  if (!COVER_checkParameters(parameters)) {
+    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+    return ERROR(GENERIC);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(GENERIC);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  /* Initialize global data */
+  g_displayLevel = parameters.notificationLevel;
+  /* Initialize context and activeDmers */
+  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+                      parameters.d)) {
+    return ERROR(GENERIC);
+  }
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    COVER_ctx_destroy(&ctx);
+    return ERROR(GENERIC);
+  }
+
+  DISPLAYLEVEL(2, "Building dictionary\n");
+  {
+    const size_t tail =
+        COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
+                              dictBufferCapacity, parameters);
+    ZDICT_params_t zdictParams = COVER_translateParams(parameters);
+    const size_t dictionarySize = ZDICT_finalizeDictionary(
+        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+        samplesBuffer, samplesSizes, nbSamples, zdictParams);
+    if (!ZSTD_isError(dictionarySize)) {
+      DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
+                   (U32)dictionarySize);
+    }
+    COVER_ctx_destroy(&ctx);
+    COVER_map_destroy(&activeDmers);
+    return dictionarySize;
+  }
+}
+
+/**
+ * COVER_best_t is used for two purposes:
+ * 1. Synchronizing threads.
+ * 2. Saving the best parameters and dictionary.
+ *
+ * All of the methods except COVER_best_init() are thread safe if zstd is
+ * compiled with multithreaded support.
+ */
+typedef struct COVER_best_s {
+  pthread_mutex_t mutex;
+  pthread_cond_t cond;
+  size_t liveJobs;
+  void *dict;
+  size_t dictSize;
+  COVER_params_t parameters;
+  size_t compressedSize;
+} COVER_best_t;
+
+/**
+ * Initialize the `COVER_best_t`.
+ */
+static void COVER_best_init(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  pthread_mutex_init(&best->mutex, NULL);
+  pthread_cond_init(&best->cond, NULL);
+  best->liveJobs = 0;
+  best->dict = NULL;
+  best->dictSize = 0;
+  best->compressedSize = (size_t)-1;
+  memset(&best->parameters, 0, sizeof(best->parameters));
+}
+
+/**
+ * Wait until liveJobs == 0.
+ */
+static void COVER_best_wait(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  pthread_mutex_lock(&best->mutex);
+  while (best->liveJobs != 0) {
+    pthread_cond_wait(&best->cond, &best->mutex);
+  }
+  pthread_mutex_unlock(&best->mutex);
+}
+
+/**
+ * Call COVER_best_wait() and then destroy the COVER_best_t.
+ */
+static void COVER_best_destroy(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  COVER_best_wait(best);
+  if (best->dict) {
+    free(best->dict);
+  }
+  pthread_mutex_destroy(&best->mutex);
+  pthread_cond_destroy(&best->cond);
+}
+
+/**
+ * Called when a thread is about to be launched.
+ * Increments liveJobs.
+ */
+static void COVER_best_start(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  pthread_mutex_lock(&best->mutex);
+  ++best->liveJobs;
+  pthread_mutex_unlock(&best->mutex);
+}
+
+/**
+ * Called when a thread finishes executing, both on error or success.
+ * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
+ * If this dictionary is the best so far save it and its parameters.
+ */
+static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
+                              COVER_params_t parameters, void *dict,
+                              size_t dictSize) {
+  if (!best) {
+    return;
+  }
+  {
+    size_t liveJobs;
+    pthread_mutex_lock(&best->mutex);
+    --best->liveJobs;
+    liveJobs = best->liveJobs;
+    /* If the new dictionary is better */
+    if (compressedSize < best->compressedSize) {
+      /* Allocate space if necessary */
+      if (!best->dict || best->dictSize < dictSize) {
+        if (best->dict) {
+          free(best->dict);
+        }
+        best->dict = malloc(dictSize);
+        if (!best->dict) {
+          best->compressedSize = ERROR(GENERIC);
+          best->dictSize = 0;
+          return;
+        }
+      }
+      /* Save the dictionary, parameters, and size */
+      memcpy(best->dict, dict, dictSize);
+      best->dictSize = dictSize;
+      best->parameters = parameters;
+      best->compressedSize = compressedSize;
+    }
+    pthread_mutex_unlock(&best->mutex);
+    if (liveJobs == 0) {
+      pthread_cond_broadcast(&best->cond);
+    }
+  }
+}
+
+/**
+ * Parameters for COVER_tryParameters().
+ */
+typedef struct COVER_tryParameters_data_s {
+  const COVER_ctx_t *ctx;
+  COVER_best_t *best;
+  size_t dictBufferCapacity;
+  COVER_params_t parameters;
+} COVER_tryParameters_data_t;
+
+/**
+ * Tries a set of parameters and upates the COVER_best_t with the results.
+ * This function is thread safe if zstd is compiled with multithreaded support.
+ * It takes its parameters as an *OWNING* opaque pointer to support threading.
+ */
+static void COVER_tryParameters(void *opaque) {
+  /* Save parameters as local variables */
+  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
+  const COVER_ctx_t *const ctx = data->ctx;
+  const COVER_params_t parameters = data->parameters;
+  size_t dictBufferCapacity = data->dictBufferCapacity;
+  size_t totalCompressedSize = ERROR(GENERIC);
+  /* Allocate space for hash table, dict, and freqs */
+  COVER_map_t activeDmers;
+  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    goto _cleanup;
+  }
+  if (!dict || !freqs) {
+    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
+    goto _cleanup;
+  }
+  /* Copy the frequencies because we need to modify them */
+  memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
+  /* Build the dictionary */
+  {
+    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
+                                              dictBufferCapacity, parameters);
+    const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
+    dictBufferCapacity = ZDICT_finalizeDictionary(
+        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
+    if (ZDICT_isError(dictBufferCapacity)) {
+      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+      goto _cleanup;
+    }
+  }
+  /* Check total compressed size */
+  {
+    /* Pointers */
+    ZSTD_CCtx *cctx;
+    ZSTD_CDict *cdict;
+    void *dst;
+    /* Local variables */
+    size_t dstCapacity;
+    size_t i;
+    /* Allocate dst with enough space to compress the maximum sized sample */
+    {
+      size_t maxSampleSize = 0;
+      for (i = 0; i < ctx->nbSamples; ++i) {
+        maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
+      }
+      dstCapacity = ZSTD_compressBound(maxSampleSize);
+      dst = malloc(dstCapacity);
+    }
+    /* Create the cctx and cdict */
+    cctx = ZSTD_createCCtx();
+    cdict =
+        ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
+    if (!dst || !cctx || !cdict) {
+      goto _compressCleanup;
+    }
+    /* Compress each sample and sum their sizes (or error) */
+    totalCompressedSize = 0;
+    for (i = 0; i < ctx->nbSamples; ++i) {
+      const size_t size = ZSTD_compress_usingCDict(
+          cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
+          ctx->samplesSizes[i], cdict);
+      if (ZSTD_isError(size)) {
+        totalCompressedSize = ERROR(GENERIC);
+        goto _compressCleanup;
+      }
+      totalCompressedSize += size;
+    }
+  _compressCleanup:
+    ZSTD_freeCCtx(cctx);
+    ZSTD_freeCDict(cdict);
+    if (dst) {
+      free(dst);
+    }
+  }
+
+_cleanup:
+  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
+                    dictBufferCapacity);
+  free(data);
+  COVER_map_destroy(&activeDmers);
+  if (dict) {
+    free(dict);
+  }
+  if (freqs) {
+    free(freqs);
+  }
+}
+
+ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
+                                                  size_t dictBufferCapacity,
+                                                  const void *samplesBuffer,
+                                                  const size_t *samplesSizes,
+                                                  unsigned nbSamples,
+                                                  COVER_params_t *parameters) {
+  /* constants */
+  const unsigned nbThreads = parameters->nbThreads;
+  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
+  const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
+  const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
+  const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
+  const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps;
+  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
+  const unsigned kIterations =
+      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
+  /* Local variables */
+  const int displayLevel = parameters->notificationLevel;
+  unsigned iteration = 1;
+  unsigned d;
+  unsigned k;
+  COVER_best_t best;
+  POOL_ctx *pool = NULL;
+  /* Checks */
+  if (kMinK < kMaxD || kMaxK < kMinK) {
+    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
+    return ERROR(GENERIC);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(GENERIC);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  if (nbThreads > 1) {
+    pool = POOL_create(nbThreads, 1);
+    if (!pool) {
+      return ERROR(memory_allocation);
+    }
+  }
+  /* Initialization */
+  COVER_best_init(&best);
+  /* Turn down global display level to clean up display at level 2 and below */
+  g_displayLevel = parameters->notificationLevel - 1;
+  /* Loop through d first because each new value needs a new context */
+  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
+                    kIterations);
+  for (d = kMinD; d <= kMaxD; d += 2) {
+    /* Initialize the context for this value of d */
+    COVER_ctx_t ctx;
+    LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
+    if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
+      LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+      COVER_best_destroy(&best);
+      return ERROR(GENERIC);
+    }
+    /* Loop through k reusing the same context */
+    for (k = kMinK; k <= kMaxK; k += kStepSize) {
+      /* Prepare the arguments */
+      COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
+          sizeof(COVER_tryParameters_data_t));
+      LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
+      if (!data) {
+        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
+        COVER_best_destroy(&best);
+        COVER_ctx_destroy(&ctx);
+        return ERROR(GENERIC);
+      }
+      data->ctx = &ctx;
+      data->best = &best;
+      data->dictBufferCapacity = dictBufferCapacity;
+      data->parameters = *parameters;
+      data->parameters.k = k;
+      data->parameters.d = d;
+      data->parameters.steps = kSteps;
+      /* Check the parameters */
+      if (!COVER_checkParameters(data->parameters)) {
+        DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+        continue;
+      }
+      /* Call the function and pass ownership of data to it */
+      COVER_best_start(&best);
+      if (pool) {
+        POOL_add(pool, &COVER_tryParameters, data);
+      } else {
+        COVER_tryParameters(data);
+      }
+      /* Print status */
+      LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",
+                         (U32)((iteration * 100) / kIterations));
+      ++iteration;
+    }
+    COVER_best_wait(&best);
+    COVER_ctx_destroy(&ctx);
+  }
+  LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
+  /* Fill the output buffer and parameters with output of the best parameters */
+  {
+    const size_t dictSize = best.dictSize;
+    if (ZSTD_isError(best.compressedSize)) {
+      COVER_best_destroy(&best);
+      return best.compressedSize;
+    }
+    *parameters = best.parameters;
+    memcpy(dictBuffer, best.dict, dictSize);
+    COVER_best_destroy(&best);
+    POOL_free(pool);
+    return dictSize;
+  }
+}
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/dictBuilder/zdict.c
--- a/contrib/python-zstandard/zstd/dictBuilder/zdict.c	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/dictBuilder/zdict.c	Mon Feb 13 09:44:16 2017 -0800
@@ -36,12 +36,11 @@
 #include <time.h>          /* clock */
 
 #include "mem.h"           /* read */
-#include "error_private.h"
 #include "fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
 #define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
+#include "huf.h"           /* HUF_buildCTable, HUF_writeCTable */
 #include "zstd_internal.h" /* includes zstd.h */
-#include "xxhash.h"
+#include "xxhash.h"        /* XXH64 */
 #include "divsufsort.h"
 #ifndef ZDICT_STATIC_LINKING_ONLY
 #  define ZDICT_STATIC_LINKING_ONLY
@@ -61,7 +60,7 @@
 #define NOISELENGTH 32
 
 #define MINRATIO 4
-static const int g_compressionLevel_default = 5;
+static const int g_compressionLevel_default = 6;
 static const U32 g_selectivity_default = 9;
 static const size_t g_provision_entropySize = 200;
 static const size_t g_min_fast_dictContent = 192;
@@ -307,13 +306,13 @@
         } while (length >=MINMATCHLENGTH);
 
         /* look backward */
-		length = MINMATCHLENGTH;
-		while ((length >= MINMATCHLENGTH) & (start > 0)) {
-			length = ZDICT_count(b + pos, b + suffix[start - 1]);
-			if (length >= LLIMIT) length = LLIMIT - 1;
-			lengthList[length]++;
-			if (length >= MINMATCHLENGTH) start--;
-		}
+        length = MINMATCHLENGTH;
+        while ((length >= MINMATCHLENGTH) & (start > 0)) {
+        	length = ZDICT_count(b + pos, b + suffix[start - 1]);
+        	if (length >= LLIMIT) length = LLIMIT - 1;
+        	lengthList[length]++;
+        	if (length >= MINMATCHLENGTH) start--;
+        }
 
         /* largest useful length */
         memset(cumulLength, 0, sizeof(cumulLength));
@@ -570,7 +569,7 @@
             if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
     }
     cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
-    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
+    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
 
     if (cSize) {  /* if == 0; block is not compressible */
         const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
@@ -825,6 +824,55 @@
 }
 
 
+
+size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
+                          const void* customDictContent, size_t dictContentSize,
+                          const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                          ZDICT_params_t params)
+{
+    size_t hSize;
+#define HBUFFSIZE 256
+    BYTE header[HBUFFSIZE];
+    int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
+    U32 const notificationLevel = params.notificationLevel;
+
+    /* check conditions */
+    if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
+    if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
+    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
+
+    /* dictionary header */
+    MEM_writeLE32(header, ZSTD_DICT_MAGIC);
+    {   U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
+        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
+        U32 const dictID = params.dictID ? params.dictID : compliantID;
+        MEM_writeLE32(header+4, dictID);
+    }
+    hSize = 8;
+
+    /* entropy tables */
+    DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
+    DISPLAYLEVEL(2, "statistics ... \n");
+    {   size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
+                                  compressionLevel,
+                                  samplesBuffer, samplesSizes, nbSamples,
+                                  customDictContent, dictContentSize,
+                                  notificationLevel);
+        if (ZDICT_isError(eSize)) return eSize;
+        hSize += eSize;
+    }
+
+    /* copy elements in final buffer ; note : src and dst buffer can overlap */
+    if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
+    {   size_t const dictSize = hSize + dictContentSize;
+        char* dictEnd = (char*)dictBuffer + dictSize;
+        memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
+        memcpy(dictBuffer, header, hSize);
+        return dictSize;
+    }
+}
+
+
 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
                                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                                  ZDICT_params_t params)
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/dictBuilder/zdict.h
--- a/contrib/python-zstandard/zstd/dictBuilder/zdict.h	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/dictBuilder/zdict.h	Mon Feb 13 09:44:16 2017 -0800
@@ -19,15 +19,18 @@
 #include <stddef.h>  /* size_t */
 
 
-/*======  Export for Windows  ======*/
-/*!
-*  ZSTD_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL
-*/
-#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZDICTLIB_API __declspec(dllexport)
+/* =====   ZDICTLIB_API : control library symbols visibility   ===== */
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#  define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
 #else
-#  define ZDICTLIB_API
+#  define ZDICTLIB_VISIBILITY
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZDICTLIB_API ZDICTLIB_VISIBILITY
 #endif
 
 
@@ -79,27 +82,114 @@
               or an error code, which can be tested by ZDICT_isError().
     note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
 */
-size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
+                                const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                                ZDICT_params_t parameters);
+
+/*! COVER_params_t :
+    For all values 0 means default.
+    kMin and d are the only required parameters.
+*/
+typedef struct {
+    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
+    unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
+    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
+
+    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
+    unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
+    unsigned dictID;             /* 0 means auto mode (32-bits random value); other : force dictID value */
+    int      compressionLevel;   /* 0 means default; target a specific zstd compression level */
+} COVER_params_t;
+
+
+/*! COVER_trainFromBuffer() :
+    Train a dictionary from an array of samples using the COVER algorithm.
+    Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+    supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+    The resulting dictionary will be saved into `dictBuffer`.
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+              or an error code, which can be tested with ZDICT_isError().
+    Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
+    Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
+           It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+           In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+           It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+*/
+ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                              COVER_params_t parameters);
+
+/*! COVER_optimizeTrainFromBuffer() :
+    The same requirements as above hold for all the parameters except `parameters`.
+    This function tries many parameter combinations and picks the best parameters.
+    `*parameters` is filled with the best parameters found, and the dictionary
+    constructed with those parameters is stored in `dictBuffer`.
+
+    All of the parameters d, k, steps are optional.
+    If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
+    if steps is zero it defaults to its default value.
+    If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
+
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+              or an error code, which can be tested with ZDICT_isError().
+              On success `*parameters` contains the parameters selected.
+    Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
+*/
+ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                                     const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
+                                     COVER_params_t *parameters);
+
+/*! ZDICT_finalizeDictionary() :
+
+    Given a custom content as a basis for dictionary, and a set of samples,
+    finalize dictionary by adding headers and statistics.
+
+    Samples must be stored concatenated in a flat buffer `samplesBuffer`,
+    supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
+
+    dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
+    maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
+
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
+              or an error code, which can be tested by ZDICT_isError().
+    note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
+    note 2 : dictBuffer and customDictContent can overlap
+*/
+#define ZDICT_CONTENTSIZE_MIN 256
+#define ZDICT_DICTSIZE_MIN    512
+ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
+                                const void* customDictContent, size_t dictContentSize,
                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                 ZDICT_params_t parameters);
 
 
-/*! ZDICT_addEntropyTablesFromBuffer() :
-
-    Given a content-only dictionary (built using any 3rd party algorithm),
-    add entropy tables computed from an array of samples.
-    Samples must be stored concatenated in a flat buffer `samplesBuffer`,
-    supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
 
-    The input dictionary content must be stored *at the end* of `dictBuffer`.
-    Its size is `dictContentSize`.
-    The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*,
-    starting from its beginning.
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
-*/
+/* Deprecation warnings */
+/* It is generally possible to disable deprecation warnings from compiler,
+   for example with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual.
+   Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
+#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
+#  define ZDICT_DEPRECATED(message) ZDICTLIB_API   /* disable deprecation warnings */
+#else
+#  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]]
+#  elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
+#  elif (ZDICT_GCC_VERSION >= 301)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API
+#  endif
+#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
+
+ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
-
+                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
 
 
 #endif   /* ZDICT_STATIC_LINKING_ONLY */
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd/zstd.h
--- a/contrib/python-zstandard/zstd/zstd.h	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd/zstd.h	Mon Feb 13 09:44:16 2017 -0800
@@ -20,13 +20,16 @@
 
 /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
 #if defined(__GNUC__) && (__GNUC__ >= 4)
-#  define ZSTDLIB_API __attribute__ ((visibility ("default")))
-#elif defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZSTDLIB_API __declspec(dllexport)
+#  define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#else
+#  define ZSTDLIB_VISIBILITY
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-#  define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 #else
-#  define ZSTDLIB_API
+#  define ZSTDLIB_API ZSTDLIB_VISIBILITY
 #endif
 
 
@@ -53,7 +56,7 @@
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    1
-#define ZSTD_VERSION_RELEASE  2
+#define ZSTD_VERSION_RELEASE  3
 
 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #define ZSTD_QUOTE(str) #str
@@ -170,8 +173,8 @@
 *   When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
 *   ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
 *   ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
-*   `dict` can be released after ZSTD_CDict creation. */
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+*   `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
 
 /*! ZSTD_freeCDict() :
 *   Function frees memory allocated by ZSTD_createCDict(). */
@@ -191,8 +194,8 @@
 
 /*! ZSTD_createDDict() :
 *   Create a digested dictionary, ready to start decompression operation without startup delay.
-*   `dict` can be released after creation. */
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+*   dictBuffer can be released after DDict creation, as its content is copied inside DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
 
 /*! ZSTD_freeDDict() :
 *   Function frees memory allocated with ZSTD_createDDict() */
@@ -325,7 +328,7 @@
  * ***************************************************************************************/
 
 /* --- Constants ---*/
-#define ZSTD_MAGICNUMBER            0xFD2FB528   /* v0.8 */
+#define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
 
 #define ZSTD_WINDOWLOG_MAX_32  25
@@ -345,8 +348,9 @@
 #define ZSTD_TARGETLENGTH_MAX 999
 
 #define ZSTD_FRAMEHEADERSIZE_MAX 18    /* for static allocation */
+#define ZSTD_FRAMEHEADERSIZE_MIN  6
 static const size_t ZSTD_frameHeaderSize_prefix = 5;
-static const size_t ZSTD_frameHeaderSize_min = 6;
+static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
 static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
 static const size_t ZSTD_skippableHeaderSize = 8;  /* magic number + skippable frame length */
 
@@ -365,9 +369,9 @@
 } ZSTD_compressionParameters;
 
 typedef struct {
-    unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
-    unsigned checksumFlag;    /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
-    unsigned noDictIDFlag;    /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
+    unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    unsigned checksumFlag;    /**< 1: generate a 32-bits checksum at end of frame, for error detection */
+    unsigned noDictIDFlag;    /**< 1: no dictID will be saved into frame header (if dictionary compression) */
 } ZSTD_frameParameters;
 
 typedef struct {
@@ -397,9 +401,23 @@
  *  Gives the amount of memory used by a given ZSTD_CCtx */
 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
 
+typedef enum {
+    ZSTD_p_forceWindow   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
+} ZSTD_CCtxParameter;
+/*! ZSTD_setCCtxParameter() :
+ *  Set advanced parameters, selected through enum ZSTD_CCtxParameter
+ *  @result : 0, or an error code (which can be tested with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is simply referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
 /*! ZSTD_createCDict_advanced() :
  *  Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
                                                   ZSTD_parameters params, ZSTD_customMem customMem);
 
 /*! ZSTD_sizeof_CDict() :
@@ -455,6 +473,15 @@
  *  Gives the amount of memory used by a given ZSTD_DCtx */
 ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
 
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is simply referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                                  unsigned byReference, ZSTD_customMem customMem);
+
 /*! ZSTD_sizeof_DDict() :
  *  Gives the amount of memory used by a given ZSTD_DDict */
 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
@@ -463,13 +490,13 @@
  *  Provides the dictID stored within dictionary.
  *  if @return == 0, the dictionary is not conformant with Zstandard specification.
  *  It can still be loaded, but as a content-only dictionary. */
-unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
 
 /*! ZSTD_getDictID_fromDDict() :
  *  Provides the dictID of the dictionary loaded into `ddict`.
  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
 
 /*! ZSTD_getDictID_fromFrame() :
  *  Provides the dictID required to decompressed the frame stored within `src`.
@@ -481,7 +508,7 @@
  *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
  *  - This is not a Zstandard frame.
  *  When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
-unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
 
 
 /********************************************************************
@@ -491,7 +518,7 @@
 /*=====   Advanced Streaming compression functions  =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   /**< pledgedSrcSize must be correct */
-ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
                                              ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be zero == unknown */
 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  /**< note : cdict will just be referenced, and must outlive compression session */
@@ -500,9 +527,9 @@
 
 
 /*=====   Advanced Streaming decompression functions  =====*/
-typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
+typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  /**< note : ddict will just be referenced, and must outlive decompression session */
 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);  /**< re-use decompression parameters from previous init; saves dictionary loading */
@@ -542,10 +569,10 @@
     In which case, it will "discard" the relevant memory section from its history.
 
   Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
-  It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
-  Without last block mark, frames will be considered unfinished (broken) by decoders.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames will be considered unfinished (corrupted) by decoders.
 
-  You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
 */
 
 /*=====   Buffer-less streaming compression functions  =====*/
@@ -553,6 +580,7 @@
 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize);
 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
diff -r f3807a135e43 -r 455677a7667f contrib/python-zstandard/zstd_cffi.py
--- a/contrib/python-zstandard/zstd_cffi.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/contrib/python-zstandard/zstd_cffi.py	Mon Feb 13 09:44:16 2017 -0800
@@ -8,145 +8,1035 @@
 
 from __future__ import absolute_import, unicode_literals
 
-import io
+import sys
 
 from _zstd_cffi import (
     ffi,
     lib,
 )
 
+if sys.version_info[0] == 2:
+    bytes_type = str
+    int_type = long
+else:
+    bytes_type = bytes
+    int_type = int
 
-_CSTREAM_IN_SIZE = lib.ZSTD_CStreamInSize()
-_CSTREAM_OUT_SIZE = lib.ZSTD_CStreamOutSize()
+
+COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
+COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
+DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
+DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
+
+new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
+
+
+MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
+MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
+FRAME_HEADER = b'\x28\xb5\x2f\xfd'
+ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
+
+WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
+WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
+CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
+CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
+HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
+HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
+HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
+SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
+SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
+SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
+SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
+TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
+TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
+
+STRATEGY_FAST = lib.ZSTD_fast
+STRATEGY_DFAST = lib.ZSTD_dfast
+STRATEGY_GREEDY = lib.ZSTD_greedy
+STRATEGY_LAZY = lib.ZSTD_lazy
+STRATEGY_LAZY2 = lib.ZSTD_lazy2
+STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
+STRATEGY_BTOPT = lib.ZSTD_btopt
+
+COMPRESSOBJ_FLUSH_FINISH = 0
+COMPRESSOBJ_FLUSH_BLOCK = 1
+
+
+class ZstdError(Exception):
+    pass
 
 
-class _ZstdCompressionWriter(object):
-    def __init__(self, cstream, writer):
-        self._cstream = cstream
+class CompressionParameters(object):
+    def __init__(self, window_log, chain_log, hash_log, search_log,
+                 search_length, target_length, strategy):
+        if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX:
+            raise ValueError('invalid window log value')
+
+        if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX:
+            raise ValueError('invalid chain log value')
+
+        if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX:
+            raise ValueError('invalid hash log value')
+
+        if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX:
+            raise ValueError('invalid search log value')
+
+        if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX:
+            raise ValueError('invalid search length value')
+
+        if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX:
+            raise ValueError('invalid target length value')
+
+        if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT:
+            raise ValueError('invalid strategy value')
+
+        self.window_log = window_log
+        self.chain_log = chain_log
+        self.hash_log = hash_log
+        self.search_log = search_log
+        self.search_length = search_length
+        self.target_length = target_length
+        self.strategy = strategy
+
+    def as_compression_parameters(self):
+        p = ffi.new('ZSTD_compressionParameters *')[0]
+        p.windowLog = self.window_log
+        p.chainLog = self.chain_log
+        p.hashLog = self.hash_log
+        p.searchLog = self.search_log
+        p.searchLength = self.search_length
+        p.targetLength = self.target_length
+        p.strategy = self.strategy
+
+        return p
+
+def get_compression_parameters(level, source_size=0, dict_size=0):
+    params = lib.ZSTD_getCParams(level, source_size, dict_size)
+    return CompressionParameters(window_log=params.windowLog,
+                                 chain_log=params.chainLog,
+                                 hash_log=params.hashLog,
+                                 search_log=params.searchLog,
+                                 search_length=params.searchLength,
+                                 target_length=params.targetLength,
+                                 strategy=params.strategy)
+
+
+def estimate_compression_context_size(params):
+    if not isinstance(params, CompressionParameters):
+        raise ValueError('argument must be a CompressionParameters')
+
+    cparams = params.as_compression_parameters()
+    return lib.ZSTD_estimateCCtxSize(cparams)
+
+
+def estimate_decompression_context_size():
+    return lib.ZSTD_estimateDCtxSize()
+
+
+class ZstdCompressionWriter(object):
+    def __init__(self, compressor, writer, source_size, write_size):
+        self._compressor = compressor
         self._writer = writer
+        self._source_size = source_size
+        self._write_size = write_size
+        self._entered = False
 
     def __enter__(self):
+        if self._entered:
+            raise ZstdError('cannot __enter__ multiple times')
+
+        self._cstream = self._compressor._get_cstream(self._source_size)
+        self._entered = True
         return self
 
     def __exit__(self, exc_type, exc_value, exc_tb):
+        self._entered = False
+
         if not exc_type and not exc_value and not exc_tb:
             out_buffer = ffi.new('ZSTD_outBuffer *')
-            out_buffer.dst = ffi.new('char[]', _CSTREAM_OUT_SIZE)
-            out_buffer.size = _CSTREAM_OUT_SIZE
+            dst_buffer = ffi.new('char[]', self._write_size)
+            out_buffer.dst = dst_buffer
+            out_buffer.size = self._write_size
             out_buffer.pos = 0
 
             while True:
-                res = lib.ZSTD_endStream(self._cstream, out_buffer)
-                if lib.ZSTD_isError(res):
-                    raise Exception('error ending compression stream: %s' % lib.ZSTD_getErrorName)
+                zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
+                if lib.ZSTD_isError(zresult):
+                    raise ZstdError('error ending compression stream: %s' %
+                                    ffi.string(lib.ZSTD_getErrorName(zresult)))
 
                 if out_buffer.pos:
-                    self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
+                    self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
                     out_buffer.pos = 0
 
-                if res == 0:
+                if zresult == 0:
                     break
 
+        self._cstream = None
+        self._compressor = None
+
         return False
 
+    def memory_size(self):
+        if not self._entered:
+            raise ZstdError('cannot determine size of an inactive compressor; '
+                            'call when a context manager is active')
+
+        return lib.ZSTD_sizeof_CStream(self._cstream)
+
     def write(self, data):
+        if not self._entered:
+            raise ZstdError('write() must be called from an active context '
+                            'manager')
+
+        total_write = 0
+
+        data_buffer = ffi.from_buffer(data)
+
+        in_buffer = ffi.new('ZSTD_inBuffer *')
+        in_buffer.src = data_buffer
+        in_buffer.size = len(data_buffer)
+        in_buffer.pos = 0
+
         out_buffer = ffi.new('ZSTD_outBuffer *')
-        out_buffer.dst = ffi.new('char[]', _CSTREAM_OUT_SIZE)
-        out_buffer.size = _CSTREAM_OUT_SIZE
+        dst_buffer = ffi.new('char[]', self._write_size)
+        out_buffer.dst = dst_buffer
+        out_buffer.size = self._write_size
+        out_buffer.pos = 0
+
+        while in_buffer.pos < in_buffer.size:
+            zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('zstd compress error: %s' %
+                                ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+            if out_buffer.pos:
+                self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
+                total_write += out_buffer.pos
+                out_buffer.pos = 0
+
+        return total_write
+
+    def flush(self):
+        if not self._entered:
+            raise ZstdError('flush must be called from an active context manager')
+
+        total_write = 0
+
+        out_buffer = ffi.new('ZSTD_outBuffer *')
+        dst_buffer = ffi.new('char[]', self._write_size)
+        out_buffer.dst = dst_buffer
+        out_buffer.size = self._write_size
         out_buffer.pos = 0
 
-        # TODO can we reuse existing memory?
-        in_buffer = ffi.new('ZSTD_inBuffer *')
-        in_buffer.src = ffi.new('char[]', data)
-        in_buffer.size = len(data)
-        in_buffer.pos = 0
-        while in_buffer.pos < in_buffer.size:
-            res = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
-            if lib.ZSTD_isError(res):
-                raise Exception('zstd compress error: %s' % lib.ZSTD_getErrorName(res))
+        while True:
+            zresult = lib.ZSTD_flushStream(self._cstream, out_buffer)
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('zstd compress error: %s' %
+                                ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+            if not out_buffer.pos:
+                break
+
+            self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
+            total_write += out_buffer.pos
+            out_buffer.pos = 0
+
+        return total_write
+
+
+class ZstdCompressionObj(object):
+    def compress(self, data):
+        if self._finished:
+            raise ZstdError('cannot call compress() after compressor finished')
+
+        data_buffer = ffi.from_buffer(data)
+        source = ffi.new('ZSTD_inBuffer *')
+        source.src = data_buffer
+        source.size = len(data_buffer)
+        source.pos = 0
+
+        chunks = []
+
+        while source.pos < len(data):
+            zresult = lib.ZSTD_compressStream(self._cstream, self._out, source)
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('zstd compress error: %s' %
+                                ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+            if self._out.pos:
+                chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
+                self._out.pos = 0
+
+        return b''.join(chunks)
 
-            if out_buffer.pos:
-                self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
-                out_buffer.pos = 0
+    def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
+        if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
+            raise ValueError('flush mode not recognized')
+
+        if self._finished:
+            raise ZstdError('compressor object already finished')
+
+        assert self._out.pos == 0
+
+        if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
+            zresult = lib.ZSTD_flushStream(self._cstream, self._out)
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('zstd compress error: %s' %
+                                ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+            # Output buffer is guaranteed to hold full block.
+            assert zresult == 0
+
+            if self._out.pos:
+                result = ffi.buffer(self._out.dst, self._out.pos)[:]
+                self._out.pos = 0
+                return result
+            else:
+                return b''
+
+        assert flush_mode == COMPRESSOBJ_FLUSH_FINISH
+        self._finished = True
+
+        chunks = []
+
+        while True:
+            zresult = lib.ZSTD_endStream(self._cstream, self._out)
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('error ending compression stream: %s' %
+                                ffi.string(lib.ZSTD_getErroName(zresult)))
+
+            if self._out.pos:
+                chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
+                self._out.pos = 0
+
+            if not zresult:
+                break
+
+        # GC compression stream immediately.
+        self._cstream = None
+
+        return b''.join(chunks)
 
 
 class ZstdCompressor(object):
-    def __init__(self, level=3, dict_data=None, compression_params=None):
-        if dict_data:
-            raise Exception('dict_data not yet supported')
-        if compression_params:
-            raise Exception('compression_params not yet supported')
+    def __init__(self, level=3, dict_data=None, compression_params=None,
+                 write_checksum=False, write_content_size=False,
+                 write_dict_id=True):
+        if level < 1:
+            raise ValueError('level must be greater than 0')
+        elif level > lib.ZSTD_maxCLevel():
+            raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
 
         self._compression_level = level
+        self._dict_data = dict_data
+        self._cparams = compression_params
+        self._fparams = ffi.new('ZSTD_frameParameters *')[0]
+        self._fparams.checksumFlag = write_checksum
+        self._fparams.contentSizeFlag = write_content_size
+        self._fparams.noDictIDFlag = not write_dict_id
 
-    def compress(self, data):
-        # Just use the stream API for now.
-        output = io.BytesIO()
-        with self.write_to(output) as compressor:
-            compressor.write(data)
-        return output.getvalue()
+        cctx = lib.ZSTD_createCCtx()
+        if cctx == ffi.NULL:
+            raise MemoryError()
+
+        self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx)
+
+    def compress(self, data, allow_empty=False):
+        if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty:
+            raise ValueError('cannot write empty inputs when writing content sizes')
+
+        # TODO use a CDict for performance.
+        dict_data = ffi.NULL
+        dict_size = 0
+
+        if self._dict_data:
+            dict_data = self._dict_data.as_bytes()
+            dict_size = len(self._dict_data)
+
+        params = ffi.new('ZSTD_parameters *')[0]
+        if self._cparams:
+            params.cParams = self._cparams.as_compression_parameters()
+        else:
+            params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data),
+                                                 dict_size)
+        params.fParams = self._fparams
+
+        dest_size = lib.ZSTD_compressBound(len(data))
+        out = new_nonzero('char[]', dest_size)
 
-    def copy_stream(self, ifh, ofh):
-        cstream = self._get_cstream()
+        zresult = lib.ZSTD_compress_advanced(self._cctx,
+                                             ffi.addressof(out), dest_size,
+                                             data, len(data),
+                                             dict_data, dict_size,
+                                             params)
+
+        if lib.ZSTD_isError(zresult):
+            raise ZstdError('cannot compress: %s' %
+                            ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+        return ffi.buffer(out, zresult)[:]
+
+    def compressobj(self, size=0):
+        cstream = self._get_cstream(size)
+        cobj = ZstdCompressionObj()
+        cobj._cstream = cstream
+        cobj._out = ffi.new('ZSTD_outBuffer *')
+        cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
+        cobj._out.dst = cobj._dst_buffer
+        cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
+        cobj._out.pos = 0
+        cobj._compressor = self
+        cobj._finished = False
+
+        return cobj
+
+    def copy_stream(self, ifh, ofh, size=0,
+                    read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
+                    write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
+
+        if not hasattr(ifh, 'read'):
+            raise ValueError('first argument must have a read() method')
+        if not hasattr(ofh, 'write'):
+            raise ValueError('second argument must have a write() method')
+
+        cstream = self._get_cstream(size)
 
         in_buffer = ffi.new('ZSTD_inBuffer *')
         out_buffer = ffi.new('ZSTD_outBuffer *')
 
-        out_buffer.dst = ffi.new('char[]', _CSTREAM_OUT_SIZE)
-        out_buffer.size = _CSTREAM_OUT_SIZE
+        dst_buffer = ffi.new('char[]', write_size)
+        out_buffer.dst = dst_buffer
+        out_buffer.size = write_size
         out_buffer.pos = 0
 
         total_read, total_write = 0, 0
 
         while True:
-            data = ifh.read(_CSTREAM_IN_SIZE)
+            data = ifh.read(read_size)
             if not data:
                 break
 
-            total_read += len(data)
-
-            in_buffer.src = ffi.new('char[]', data)
-            in_buffer.size = len(data)
+            data_buffer = ffi.from_buffer(data)
+            total_read += len(data_buffer)
+            in_buffer.src = data_buffer
+            in_buffer.size = len(data_buffer)
             in_buffer.pos = 0
 
             while in_buffer.pos < in_buffer.size:
-                res = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
-                if lib.ZSTD_isError(res):
-                    raise Exception('zstd compress error: %s' %
-                                    lib.ZSTD_getErrorName(res))
+                zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
+                if lib.ZSTD_isError(zresult):
+                    raise ZstdError('zstd compress error: %s' %
+                                    ffi.string(lib.ZSTD_getErrorName(zresult)))
 
                 if out_buffer.pos:
                     ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
-                    total_write = out_buffer.pos
+                    total_write += out_buffer.pos
                     out_buffer.pos = 0
 
         # We've finished reading. Flush the compressor.
         while True:
-            res = lib.ZSTD_endStream(cstream, out_buffer)
-            if lib.ZSTD_isError(res):
-                raise Exception('error ending compression stream: %s' %
-                                lib.ZSTD_getErrorName(res))
+            zresult = lib.ZSTD_endStream(cstream, out_buffer)
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('error ending compression stream: %s' %
+                                ffi.string(lib.ZSTD_getErrorName(zresult)))
 
             if out_buffer.pos:
                 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
                 total_write += out_buffer.pos
                 out_buffer.pos = 0
 
-            if res == 0:
+            if zresult == 0:
                 break
 
         return total_read, total_write
 
-    def write_to(self, writer):
-        return _ZstdCompressionWriter(self._get_cstream(), writer)
+    def write_to(self, writer, size=0,
+                 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
+
+        if not hasattr(writer, 'write'):
+            raise ValueError('must pass an object with a write() method')
+
+        return ZstdCompressionWriter(self, writer, size, write_size)
+
+    def read_from(self, reader, size=0,
+                  read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
+                  write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
+        if hasattr(reader, 'read'):
+            have_read = True
+        elif hasattr(reader, '__getitem__'):
+            have_read = False
+            buffer_offset = 0
+            size = len(reader)
+        else:
+            raise ValueError('must pass an object with a read() method or '
+                             'conforms to buffer protocol')
+
+        cstream = self._get_cstream(size)
+
+        in_buffer = ffi.new('ZSTD_inBuffer *')
+        out_buffer = ffi.new('ZSTD_outBuffer *')
+
+        in_buffer.src = ffi.NULL
+        in_buffer.size = 0
+        in_buffer.pos = 0
+
+        dst_buffer = ffi.new('char[]', write_size)
+        out_buffer.dst = dst_buffer
+        out_buffer.size = write_size
+        out_buffer.pos = 0
+
+        while True:
+            # We should never have output data sitting around after a previous
+            # iteration.
+            assert out_buffer.pos == 0
+
+            # Collect input data.
+            if have_read:
+                read_result = reader.read(read_size)
+            else:
+                remaining = len(reader) - buffer_offset
+                slice_size = min(remaining, read_size)
+                read_result = reader[buffer_offset:buffer_offset + slice_size]
+                buffer_offset += slice_size
 
-    def _get_cstream(self):
+            # No new input data. Break out of the read loop.
+            if not read_result:
+                break
+
+            # Feed all read data into the compressor and emit output until
+            # exhausted.
+            read_buffer = ffi.from_buffer(read_result)
+            in_buffer.src = read_buffer
+            in_buffer.size = len(read_buffer)
+            in_buffer.pos = 0
+
+            while in_buffer.pos < in_buffer.size:
+                zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
+                if lib.ZSTD_isError(zresult):
+                    raise ZstdError('zstd compress error: %s' %
+                                    ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+                if out_buffer.pos:
+                    data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
+                    out_buffer.pos = 0
+                    yield data
+
+            assert out_buffer.pos == 0
+
+            # And repeat the loop to collect more data.
+            continue
+
+        # If we get here, input is exhausted. End the stream and emit what
+        # remains.
+        while True:
+            assert out_buffer.pos == 0
+            zresult = lib.ZSTD_endStream(cstream, out_buffer)
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('error ending compression stream: %s' %
+                                ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+            if out_buffer.pos:
+                data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
+                out_buffer.pos = 0
+                yield data
+
+            if zresult == 0:
+                break
+
+    def _get_cstream(self, size):
         cstream = lib.ZSTD_createCStream()
+        if cstream == ffi.NULL:
+            raise MemoryError()
+
         cstream = ffi.gc(cstream, lib.ZSTD_freeCStream)
 
-        res = lib.ZSTD_initCStream(cstream, self._compression_level)
-        if lib.ZSTD_isError(res):
+        dict_data = ffi.NULL
+        dict_size = 0
+        if self._dict_data:
+            dict_data = self._dict_data.as_bytes()
+            dict_size = len(self._dict_data)
+
+        zparams = ffi.new('ZSTD_parameters *')[0]
+        if self._cparams:
+            zparams.cParams = self._cparams.as_compression_parameters()
+        else:
+            zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
+                                                  size, dict_size)
+        zparams.fParams = self._fparams
+
+        zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size,
+                                                zparams, size)
+        if lib.ZSTD_isError(zresult):
             raise Exception('cannot init CStream: %s' %
-                            lib.ZSTD_getErrorName(res))
+                            ffi.string(lib.ZSTD_getErrorName(zresult)))
 
         return cstream
+
+
+class FrameParameters(object):
+    def __init__(self, fparams):
+        self.content_size = fparams.frameContentSize
+        self.window_size = fparams.windowSize
+        self.dict_id = fparams.dictID
+        self.has_checksum = bool(fparams.checksumFlag)
+
+
+def get_frame_parameters(data):
+    if not isinstance(data, bytes_type):
+        raise TypeError('argument must be bytes')
+
+    params = ffi.new('ZSTD_frameParams *')
+
+    zresult = lib.ZSTD_getFrameParams(params, data, len(data))
+    if lib.ZSTD_isError(zresult):
+        raise ZstdError('cannot get frame parameters: %s' %
+                        ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+    if zresult:
+        raise ZstdError('not enough data for frame parameters; need %d bytes' %
+                        zresult)
+
+    return FrameParameters(params[0])
+
+
+class ZstdCompressionDict(object):
+    def __init__(self, data):
+        assert isinstance(data, bytes_type)
+        self._data = data
+
+    def __len__(self):
+        return len(self._data)
+
+    def dict_id(self):
+        return int_type(lib.ZDICT_getDictID(self._data, len(self._data)))
+
+    def as_bytes(self):
+        return self._data
+
+
+def train_dictionary(dict_size, samples, parameters=None):
+    if not isinstance(samples, list):
+        raise TypeError('samples must be a list')
+
+    total_size = sum(map(len, samples))
+
+    samples_buffer = new_nonzero('char[]', total_size)
+    sample_sizes = new_nonzero('size_t[]', len(samples))
+
+    offset = 0
+    for i, sample in enumerate(samples):
+        if not isinstance(sample, bytes_type):
+            raise ValueError('samples must be bytes')
+
+        l = len(sample)
+        ffi.memmove(samples_buffer + offset, sample, l)
+        offset += l
+        sample_sizes[i] = l
+
+    dict_data = new_nonzero('char[]', dict_size)
+
+    zresult = lib.ZDICT_trainFromBuffer(ffi.addressof(dict_data), dict_size,
+                                        ffi.addressof(samples_buffer),
+                                        ffi.addressof(sample_sizes, 0),
+                                        len(samples))
+    if lib.ZDICT_isError(zresult):
+        raise ZstdError('Cannot train dict: %s' %
+                        ffi.string(lib.ZDICT_getErrorName(zresult)))
+
+    return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:])
+
+
+class ZstdDecompressionObj(object):
+    def __init__(self, decompressor):
+        self._decompressor = decompressor
+        self._dstream = self._decompressor._get_dstream()
+        self._finished = False
+
+    def decompress(self, data):
+        if self._finished:
+            raise ZstdError('cannot use a decompressobj multiple times')
+
+        in_buffer = ffi.new('ZSTD_inBuffer *')
+        out_buffer = ffi.new('ZSTD_outBuffer *')
+
+        data_buffer = ffi.from_buffer(data)
+        in_buffer.src = data_buffer
+        in_buffer.size = len(data_buffer)
+        in_buffer.pos = 0
+
+        dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
+        out_buffer.dst = dst_buffer
+        out_buffer.size = len(dst_buffer)
+        out_buffer.pos = 0
+
+        chunks = []
+
+        while in_buffer.pos < in_buffer.size:
+            zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('zstd decompressor error: %s' %
+                                ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+            if zresult == 0:
+                self._finished = True
+                self._dstream = None
+                self._decompressor = None
+
+            if out_buffer.pos:
+                chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
+                out_buffer.pos = 0
+
+        return b''.join(chunks)
+
+
+class ZstdDecompressionWriter(object):
+    def __init__(self, decompressor, writer, write_size):
+        self._decompressor = decompressor
+        self._writer = writer
+        self._write_size = write_size
+        self._dstream = None
+        self._entered = False
+
+    def __enter__(self):
+        if self._entered:
+            raise ZstdError('cannot __enter__ multiple times')
+
+        self._dstream = self._decompressor._get_dstream()
+        self._entered = True
+
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        self._entered = False
+        self._dstream = None
+
+    def memory_size(self):
+        if not self._dstream:
+            raise ZstdError('cannot determine size of inactive decompressor '
+                            'call when context manager is active')
+
+        return lib.ZSTD_sizeof_DStream(self._dstream)
+
+    def write(self, data):
+        if not self._entered:
+            raise ZstdError('write must be called from an active context manager')
+
+        total_write = 0
+
+        in_buffer = ffi.new('ZSTD_inBuffer *')
+        out_buffer = ffi.new('ZSTD_outBuffer *')
+
+        data_buffer = ffi.from_buffer(data)
+        in_buffer.src = data_buffer
+        in_buffer.size = len(data_buffer)
+        in_buffer.pos = 0
+
+        dst_buffer = ffi.new('char[]', self._write_size)
+        out_buffer.dst = dst_buffer
+        out_buffer.size = len(dst_buffer)
+        out_buffer.pos = 0
+
+        while in_buffer.pos < in_buffer.size:
+            zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('zstd decompress error: %s' %
+                                ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+            if out_buffer.pos:
+                self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
+                total_write += out_buffer.pos
+                out_buffer.pos = 0
+
+        return total_write
+
+
+class ZstdDecompressor(object):
+    def __init__(self, dict_data=None):
+        self._dict_data = dict_data
+
+        dctx = lib.ZSTD_createDCtx()
+        if dctx == ffi.NULL:
+            raise MemoryError()
+
+        self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
+
+    @property
+    def _ddict(self):
+        if self._dict_data:
+            dict_data = self._dict_data.as_bytes()
+            dict_size = len(self._dict_data)
+
+            ddict = lib.ZSTD_createDDict(dict_data, dict_size)
+            if ddict == ffi.NULL:
+                raise ZstdError('could not create decompression dict')
+        else:
+            ddict = None
+
+        self.__dict__['_ddict'] = ddict
+        return ddict
+
+    def decompress(self, data, max_output_size=0):
+        data_buffer = ffi.from_buffer(data)
+
+        orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx))
+        dctx = ffi.cast('ZSTD_DCtx *', orig_dctx)
+        lib.ZSTD_copyDCtx(dctx, self._refdctx)
+
+        ddict = self._ddict
+
+        output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer))
+        if output_size:
+            result_buffer = ffi.new('char[]', output_size)
+            result_size = output_size
+        else:
+            if not max_output_size:
+                raise ZstdError('input data invalid or missing content size '
+                                'in frame header')
+
+            result_buffer = ffi.new('char[]', max_output_size)
+            result_size = max_output_size
+
+        if ddict:
+            zresult = lib.ZSTD_decompress_usingDDict(dctx,
+                                                     result_buffer, result_size,
+                                                     data_buffer, len(data_buffer),
+                                                     ddict)
+        else:
+            zresult = lib.ZSTD_decompressDCtx(dctx,
+                                              result_buffer, result_size,
+                                              data_buffer, len(data_buffer))
+        if lib.ZSTD_isError(zresult):
+            raise ZstdError('decompression error: %s' %
+                            ffi.string(lib.ZSTD_getErrorName(zresult)))
+        elif output_size and zresult != output_size:
+            raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
+                            (zresult, output_size))
+
+        return ffi.buffer(result_buffer, zresult)[:]
+
+    def decompressobj(self):
+        return ZstdDecompressionObj(self)
+
+    def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
+                  write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
+                  skip_bytes=0):
+        if skip_bytes >= read_size:
+            raise ValueError('skip_bytes must be smaller than read_size')
+
+        if hasattr(reader, 'read'):
+            have_read = True
+        elif hasattr(reader, '__getitem__'):
+            have_read = False
+            buffer_offset = 0
+            size = len(reader)
+        else:
+            raise ValueError('must pass an object with a read() method or '
+                             'conforms to buffer protocol')
+
+        if skip_bytes:
+            if have_read:
+                reader.read(skip_bytes)
+            else:
+                if skip_bytes > size:
+                    raise ValueError('skip_bytes larger than first input chunk')
+
+                buffer_offset = skip_bytes
+
+        dstream = self._get_dstream()
+
+        in_buffer = ffi.new('ZSTD_inBuffer *')
+        out_buffer = ffi.new('ZSTD_outBuffer *')
+
+        dst_buffer = ffi.new('char[]', write_size)
+        out_buffer.dst = dst_buffer
+        out_buffer.size = len(dst_buffer)
+        out_buffer.pos = 0
+
+        while True:
+            assert out_buffer.pos == 0
+
+            if have_read:
+                read_result = reader.read(read_size)
+            else:
+                remaining = size - buffer_offset
+                slice_size = min(remaining, read_size)
+                read_result = reader[buffer_offset:buffer_offset + slice_size]
+                buffer_offset += slice_size
+
+            # No new input. Break out of read loop.
+            if not read_result:
+                break
+
+            # Feed all read data into decompressor and emit output until
+            # exhausted.
+            read_buffer = ffi.from_buffer(read_result)
+            in_buffer.src = read_buffer
+            in_buffer.size = len(read_buffer)
+            in_buffer.pos = 0
+
+            while in_buffer.pos < in_buffer.size:
+                assert out_buffer.pos == 0
+
+                zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
+                if lib.ZSTD_isError(zresult):
+                    raise ZstdError('zstd decompress error: %s' %
+                                    ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+                if out_buffer.pos:
+                    data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
+                    out_buffer.pos = 0
+                    yield data
+
+                if zresult == 0:
+                    return
+
+            # Repeat loop to collect more input data.
+            continue
+
+        # If we get here, input is exhausted.
+
+    def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
+        if not hasattr(writer, 'write'):
+            raise ValueError('must pass an object with a write() method')
+
+        return ZstdDecompressionWriter(self, writer, write_size)
+
+    def copy_stream(self, ifh, ofh,
+                    read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
+                    write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
+        if not hasattr(ifh, 'read'):
+            raise ValueError('first argument must have a read() method')
+        if not hasattr(ofh, 'write'):
+            raise ValueError('second argument must have a write() method')
+
+        dstream = self._get_dstream()
+
+        in_buffer = ffi.new('ZSTD_inBuffer *')
+        out_buffer = ffi.new('ZSTD_outBuffer *')
+
+        dst_buffer = ffi.new('char[]', write_size)
+        out_buffer.dst = dst_buffer
+        out_buffer.size = write_size
+        out_buffer.pos = 0
+
+        total_read, total_write = 0, 0
+
+        # Read all available input.
+        while True:
+            data = ifh.read(read_size)
+            if not data:
+                break
+
+            data_buffer = ffi.from_buffer(data)
+            total_read += len(data_buffer)
+            in_buffer.src = data_buffer
+            in_buffer.size = len(data_buffer)
+            in_buffer.pos = 0
+
+            # Flush all read data to output.
+            while in_buffer.pos < in_buffer.size:
+                zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
+                if lib.ZSTD_isError(zresult):
+                    raise ZstdError('zstd decompressor error: %s' %
+                                    ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+                if out_buffer.pos:
+                    ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
+                    total_write += out_buffer.pos
+                    out_buffer.pos = 0
+
+            # Continue loop to keep reading.
+
+        return total_read, total_write
+
+    def decompress_content_dict_chain(self, frames):
+        if not isinstance(frames, list):
+            raise TypeError('argument must be a list')
+
+        if not frames:
+            raise ValueError('empty input chain')
+
+        # First chunk should not be using a dictionary. We handle it specially.
+        chunk = frames[0]
+        if not isinstance(chunk, bytes_type):
+            raise ValueError('chunk 0 must be bytes')
+
+        # All chunks should be zstd frames and should have content size set.
+        chunk_buffer = ffi.from_buffer(chunk)
+        params = ffi.new('ZSTD_frameParams *')
+        zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
+        if lib.ZSTD_isError(zresult):
+            raise ValueError('chunk 0 is not a valid zstd frame')
+        elif zresult:
+            raise ValueError('chunk 0 is too small to contain a zstd frame')
+
+        if not params.frameContentSize:
+            raise ValueError('chunk 0 missing content size in frame')
+
+        dctx = lib.ZSTD_createDCtx()
+        if dctx == ffi.NULL:
+            raise MemoryError()
+
+        dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
+
+        last_buffer = ffi.new('char[]', params.frameContentSize)
+
+        zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer),
+                                          chunk_buffer, len(chunk_buffer))
+        if lib.ZSTD_isError(zresult):
+            raise ZstdError('could not decompress chunk 0: %s' %
+                            ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+        # Special case of chain length of 1
+        if len(frames) == 1:
+            return ffi.buffer(last_buffer, len(last_buffer))[:]
+
+        i = 1
+        while i < len(frames):
+            chunk = frames[i]
+            if not isinstance(chunk, bytes_type):
+                raise ValueError('chunk %d must be bytes' % i)
+
+            chunk_buffer = ffi.from_buffer(chunk)
+            zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
+            if lib.ZSTD_isError(zresult):
+                raise ValueError('chunk %d is not a valid zstd frame' % i)
+            elif zresult:
+                raise ValueError('chunk %d is too small to contain a zstd frame' % i)
+
+            if not params.frameContentSize:
+                raise ValueError('chunk %d missing content size in frame' % i)
+
+            dest_buffer = ffi.new('char[]', params.frameContentSize)
+
+            zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer),
+                                                    chunk_buffer, len(chunk_buffer),
+                                                    last_buffer, len(last_buffer))
+            if lib.ZSTD_isError(zresult):
+                raise ZstdError('could not decompress chunk %d' % i)
+
+            last_buffer = dest_buffer
+            i += 1
+
+        return ffi.buffer(last_buffer, len(last_buffer))[:]
+
+    def _get_dstream(self):
+        dstream = lib.ZSTD_createDStream()
+        if dstream == ffi.NULL:
+            raise MemoryError()
+
+        dstream = ffi.gc(dstream, lib.ZSTD_freeDStream)
+
+        if self._dict_data:
+            zresult = lib.ZSTD_initDStream_usingDict(dstream,
+                                                     self._dict_data.as_bytes(),
+                                                     len(self._dict_data))
+        else:
+            zresult = lib.ZSTD_initDStream(dstream)
+
+        if lib.ZSTD_isError(zresult):
+            raise ZstdError('could not initialize DStream: %s' %
+                            ffi.string(lib.ZSTD_getErrorName(zresult)))
+
+        return dstream
diff -r f3807a135e43 -r 455677a7667f hgext/mq.py
--- a/hgext/mq.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/hgext/mq.py	Mon Feb 13 09:44:16 2017 -0800
@@ -14,7 +14,7 @@
 Known patches are represented as patch files in the .hg/patches
 directory. Applied patches are both patch files and changesets.
 
-Common tasks (use :hg:`help command` for more details)::
+Common tasks (use :hg:`help COMMAND` for more details)::
 
   create new patch                          qnew
   import existing patch                     qimport
diff -r f3807a135e43 -r 455677a7667f hgext/pager.py
--- a/hgext/pager.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/hgext/pager.py	Mon Feb 13 09:44:16 2017 -0800
@@ -72,6 +72,7 @@
     commands,
     dispatch,
     encoding,
+    error,
     extensions,
     util,
     )
@@ -87,14 +88,10 @@
                              close_fds=util.closefds, stdin=subprocess.PIPE,
                              stdout=util.stdout, stderr=util.stderr)
 
-    # back up original file objects and descriptors
-    olduifout = ui.fout
-    oldstdout = util.stdout
+    # back up original file descriptors
     stdoutfd = os.dup(util.stdout.fileno())
     stderrfd = os.dup(util.stderr.fileno())
 
-    # create new line-buffered stdout so that output can show up immediately
-    ui.fout = util.stdout = newstdout = os.fdopen(util.stdout.fileno(), 'wb', 1)
     os.dup2(pager.stdin.fileno(), util.stdout.fileno())
     if ui._isatty(util.stderr):
         os.dup2(pager.stdin.fileno(), util.stderr.fileno())
@@ -103,17 +100,15 @@
     def killpager():
         if util.safehasattr(signal, "SIGINT"):
             signal.signal(signal.SIGINT, signal.SIG_IGN)
-        pager.stdin.close()
-        ui.fout = olduifout
-        util.stdout = oldstdout
-        # close new stdout while it's associated with pager; otherwise stdout
-        # fd would be closed when newstdout is deleted
-        newstdout.close()
-        # restore original fds: stdout is open again
+        # restore original fds, closing pager.stdin copies in the process
         os.dup2(stdoutfd, util.stdout.fileno())
         os.dup2(stderrfd, util.stderr.fileno())
+        pager.stdin.close()
         pager.wait()
 
+def catchterm(*args):
+    raise error.SignalInterrupt
+
 def uisetup(ui):
     class pagerui(ui.__class__):
         def _runpager(self, pagercmd):
@@ -154,7 +149,7 @@
             ui.setconfig('ui', 'formatted', ui.formatted(), 'pager')
             ui.setconfig('ui', 'interactive', False, 'pager')
             if util.safehasattr(signal, "SIGPIPE"):
-                signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+                signal.signal(signal.SIGPIPE, catchterm)
             ui._runpager(p)
         return orig(ui, options, cmd, cmdfunc)
 
diff -r f3807a135e43 -r 455677a7667f hgext/rebase.py
--- a/hgext/rebase.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/hgext/rebase.py	Mon Feb 13 09:44:16 2017 -0800
@@ -1367,7 +1367,7 @@
     """store the currently rebased set on the repo object
 
     This is used by another function to prevent rebased revision to because
-    hidden (see issue4505)"""
+    hidden (see issue4504)"""
     repo = repo.unfiltered()
     revs = set(revs)
     repo._rebaseset = revs
@@ -1383,7 +1383,7 @@
         del repo._rebaseset
 
 def _rebasedvisible(orig, repo):
-    """ensure rebased revs stay visible (see issue4505)"""
+    """ensure rebased revs stay visible (see issue4504)"""
     blockers = orig(repo)
     blockers.update(getattr(repo, '_rebaseset', ()))
     return blockers
diff -r f3807a135e43 -r 455677a7667f hgext/zeroconf/__init__.py
--- a/hgext/zeroconf/__init__.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/hgext/zeroconf/__init__.py	Mon Feb 13 09:44:16 2017 -0800
@@ -64,7 +64,9 @@
     # Generic method, sometimes gives useless results
     try:
         dumbip = socket.gethostbyaddr(socket.gethostname())[2][0]
-        if not dumbip.startswith('127.') and ':' not in dumbip:
+        if ':' in dumbip:
+            dumbip = '127.0.0.1'
+        if not dumbip.startswith('127.'):
             return dumbip
     except (socket.gaierror, socket.herror):
         dumbip = '127.0.0.1'
diff -r f3807a135e43 -r 455677a7667f mercurial/bundle2.py
--- a/mercurial/bundle2.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/bundle2.py	Mon Feb 13 09:44:16 2017 -0800
@@ -320,9 +320,6 @@
     It iterates over each part then searches for and uses the proper handling
     code to process the part. Parts are processed in order.
 
-    This is very early version of this function that will be strongly reworked
-    before final usage.
-
     Unknown Mandatory part will abort the process.
 
     It is temporarily possible to provide a prebuilt bundleoperation to the
@@ -865,6 +862,11 @@
         self._generated = None
         self.mandatory = mandatory
 
+    def __repr__(self):
+        cls = "%s.%s" % (self.__class__.__module__, self.__class__.__name__)
+        return ('<%s object at %x; id: %s; type: %s; mandatory: %s>'
+                % (cls, id(self), self.id, self.type, self.mandatory))
+
     def copy(self):
         """return a copy of the part
 
diff -r f3807a135e43 -r 455677a7667f mercurial/cmdutil.py
--- a/mercurial/cmdutil.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/cmdutil.py	Mon Feb 13 09:44:16 2017 -0800
@@ -26,14 +26,12 @@
     changelog,
     copies,
     crecord as crecordmod,
-    dirstateguard as dirstateguardmod,
     encoding,
     error,
     formatter,
     graphmod,
     lock as lockmod,
     match as matchmod,
-    mergeutil,
     obsolete,
     patch,
     pathutil,
@@ -3366,11 +3364,6 @@
 
     return cmd
 
-def checkunresolved(ms):
-    ms._repo.ui.deprecwarn('checkunresolved moved from cmdutil to mergeutil',
-                           '4.1')
-    return mergeutil.checkunresolved(ms)
-
 # a list of (ui, repo, otherpeer, opts, missing) functions called by
 # commands.outgoing.  "missing" is "missing" of the result of
 # "findcommonoutgoing()"
@@ -3477,10 +3470,3 @@
     if after[1]:
         hint = after[0]
     raise error.Abort(_('no %s in progress') % task, hint=hint)
-
-class dirstateguard(dirstateguardmod.dirstateguard):
-    def __init__(self, repo, name):
-        dirstateguardmod.dirstateguard.__init__(self, repo, name)
-        repo.ui.deprecwarn(
-            'dirstateguard has moved from cmdutil to dirstateguard',
-            '4.1')
diff -r f3807a135e43 -r 455677a7667f mercurial/commands.py
--- a/mercurial/commands.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/commands.py	Mon Feb 13 09:44:16 2017 -0800
@@ -59,6 +59,7 @@
     revset,
     scmutil,
     server,
+    smartset,
     sshserver,
     sslutil,
     streamclone,
@@ -1902,7 +1903,7 @@
     fm.write('pythonver', _("checking Python version (%s)\n"),
              ("%d.%d.%d" % sys.version_info[:3]))
     fm.write('pythonlib', _("checking Python lib (%s)...\n"),
-             os.path.dirname(os.__file__))
+             os.path.dirname(pycompat.fsencode(os.__file__)))
 
     security = set(sslutil.supportedprotocols)
     if sslutil.hassni:
@@ -2804,8 +2805,8 @@
         arevs = revset.makematcher(treebystage['analyzed'])(repo)
         brevs = revset.makematcher(treebystage['optimized'])(repo)
         if ui.verbose:
-            ui.note(("* analyzed set:\n"), revset.prettyformatset(arevs), "\n")
-            ui.note(("* optimized set:\n"), revset.prettyformatset(brevs), "\n")
+            ui.note(("* analyzed set:\n"), smartset.prettyformat(arevs), "\n")
+            ui.note(("* optimized set:\n"), smartset.prettyformat(brevs), "\n")
         arevs = list(arevs)
         brevs = list(brevs)
         if arevs == brevs:
@@ -2828,7 +2829,7 @@
     func = revset.makematcher(tree)
     revs = func(repo)
     if ui.verbose:
-        ui.note(("* set:\n"), revset.prettyformatset(revs), "\n")
+        ui.note(("* set:\n"), smartset.prettyformat(revs), "\n")
     for c in revs:
         ui.write("%s\n" % c)
 
@@ -3916,6 +3917,7 @@
     if ui.verbose:
         keep.append('verbose')
 
+    fullname = name
     section = None
     subtopic = None
     if name and '.' in name:
@@ -3938,7 +3940,7 @@
     # to look for, or we could have simply failed to found "foo.bar"
     # because bar isn't a section of foo
     if section and not (formatted and name):
-        raise error.Abort(_("help section not found"))
+        raise error.Abort(_("help section not found: %s") % fullname)
 
     if 'verbose' in pruned:
         keep.append('omitted')
@@ -4127,8 +4129,9 @@
     Import a list of patches and commit them individually (unless
     --no-commit is specified).
 
-    To read a patch from standard input, use "-" as the patch name. If
-    a URL is specified, the patch will be downloaded from there.
+    To read a patch from standard input (stdin), use "-" as the patch
+    name. If a URL is specified, the patch will be downloaded from
+    there.
 
     Import first applies changes to the working directory (unless
     --bypass is specified), import will abort if there are outstanding
@@ -4198,6 +4201,10 @@
 
           hg import incoming-patches.mbox
 
+      - import patches from stdin::
+
+          hg import -
+
       - attempt to exactly restore an exported changeset (not always
         possible)::
 
diff -r f3807a135e43 -r 455677a7667f mercurial/commandserver.py
--- a/mercurial/commandserver.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/commandserver.py	Mon Feb 13 09:44:16 2017 -0800
@@ -447,6 +447,7 @@
         self._sock = None
         self._oldsigchldhandler = None
         self._workerpids = set()  # updated by signal handler; do not iterate
+        self._socketunlinked = None
 
     def init(self):
         self._sock = socket.socket(socket.AF_UNIX)
@@ -455,11 +456,17 @@
         o = signal.signal(signal.SIGCHLD, self._sigchldhandler)
         self._oldsigchldhandler = o
         self._servicehandler.printbanner(self.address)
+        self._socketunlinked = False
+
+    def _unlinksocket(self):
+        if not self._socketunlinked:
+            self._servicehandler.unlinksocket(self.address)
+            self._socketunlinked = True
 
     def _cleanup(self):
         signal.signal(signal.SIGCHLD, self._oldsigchldhandler)
         self._sock.close()
-        self._servicehandler.unlinksocket(self.address)
+        self._unlinksocket()
         # don't kill child processes as they have active clients, just wait
         self._reapworkers(0)
 
@@ -470,11 +477,23 @@
             self._cleanup()
 
     def _mainloop(self):
+        exiting = False
         h = self._servicehandler
-        while not h.shouldexit():
+        while True:
+            if not exiting and h.shouldexit():
+                # clients can no longer connect() to the domain socket, so
+                # we stop queuing new requests.
+                # for requests that are queued (connect()-ed, but haven't been
+                # accept()-ed), handle them before exit. otherwise, clients
+                # waiting for recv() will receive ECONNRESET.
+                self._unlinksocket()
+                exiting = True
             try:
                 ready = select.select([self._sock], [], [], h.pollinterval)[0]
                 if not ready:
+                    # only exit if we completed all queued requests
+                    if exiting:
+                        break
                     continue
                 conn, _addr = self._sock.accept()
             except (select.error, socket.error) as inst:
diff -r f3807a135e43 -r 455677a7667f mercurial/destutil.py
--- a/mercurial/destutil.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/destutil.py	Mon Feb 13 09:44:16 2017 -0800
@@ -37,10 +37,6 @@
                     hint = _("commit and merge, or update --clean to"
                              " discard changes")
                     raise error.UpdateAbort(msg, hint=hint)
-                elif not check:  # destination is not a descendant.
-                    msg = _("not a linear update")
-                    hint = _("merge or update --check to force update")
-                    raise error.UpdateAbort(msg, hint=hint)
 
 def _destupdateobs(repo, clean, check):
     """decide of an update destination from obsolescence markers"""
diff -r f3807a135e43 -r 455677a7667f mercurial/exchange.py
--- a/mercurial/exchange.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/exchange.py	Mon Feb 13 09:44:16 2017 -0800
@@ -1724,9 +1724,15 @@
     if url.startswith('remote:http:') or url.startswith('remote:https:'):
         captureoutput = True
     try:
+        # note: outside bundle1, 'heads' is expected to be empty and this
+        # 'check_heads' call wil be a no-op
         check_heads(repo, heads, 'uploading changes')
         # push can proceed
-        if util.safehasattr(cg, 'params'):
+        if not util.safehasattr(cg, 'params'):
+            # legacy case: bundle1 (changegroup 01)
+            lockandtr[1] = repo.lock()
+            r = cg.apply(repo, source, url)
+        else:
             r = None
             try:
                 def gettransaction():
@@ -1765,9 +1771,6 @@
                                                   mandatory=False)
                         parts.append(part)
                 raise
-        else:
-            lockandtr[1] = repo.lock()
-            r = cg.apply(repo, source, url)
     finally:
         lockmod.release(lockandtr[2], lockandtr[1], lockandtr[0])
         if recordout is not None:
diff -r f3807a135e43 -r 455677a7667f mercurial/filemerge.py
--- a/mercurial/filemerge.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/filemerge.py	Mon Feb 13 09:44:16 2017 -0800
@@ -489,6 +489,9 @@
     args = util.interpolate(r'\$', replace, args,
                             lambda s: util.shellquote(util.localpath(s)))
     cmd = toolpath + ' ' + args
+    if _toolbool(ui, tool, "gui"):
+        repo.ui.status(_('running merge tool %s for file %s\n') %
+                       (tool, fcd.path()))
     repo.ui.debug('launching merge tool: %s\n' % cmd)
     r = ui.system(cmd, cwd=repo.root, environ=env)
     repo.ui.debug('merge tool returned: %s\n' % r)
diff -r f3807a135e43 -r 455677a7667f mercurial/localrepo.py
--- a/mercurial/localrepo.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/localrepo.py	Mon Feb 13 09:44:16 2017 -0800
@@ -1852,6 +1852,11 @@
                                   listsubrepos)
 
     def heads(self, start=None):
+        if start is None:
+            cl = self.changelog
+            headrevs = reversed(cl.headrevs())
+            return [cl.node(rev) for rev in headrevs]
+
         heads = self.changelog.heads(start)
         # sort the output in rev descending order
         return sorted(heads, key=self.changelog.rev, reverse=True)
diff -r f3807a135e43 -r 455677a7667f mercurial/merge.py
--- a/mercurial/merge.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/merge.py	Mon Feb 13 09:44:16 2017 -0800
@@ -1448,7 +1448,7 @@
     """
     Perform a merge between the working directory and the given node
 
-    node = the node to update to, or None if unspecified
+    node = the node to update to
     branchmerge = whether to merge between branches
     force = whether to force branch merging or file overwriting
     matcher = a matcher to filter file lists (dirstate not updated)
@@ -1491,7 +1491,9 @@
     Return the same tuple as applyupdates().
     """
 
-    onode = node
+    # This functon used to find the default destination if node was None, but
+    # that's now in destutil.py.
+    assert node is not None
     # If we're doing a partial update, we need to skip updating
     # the dirstate, so make a note of any partial-ness to the
     # update here.
@@ -1550,37 +1552,30 @@
 
             if pas not in ([p1], [p2]):  # nonlinear
                 dirty = wc.dirty(missing=True)
-                if dirty or onode is None:
+                if dirty:
                     # Branching is a bit strange to ensure we do the minimal
-                    # amount of call to obsolete.background.
+                    # amount of call to obsolete.foreground.
                     foreground = obsolete.foreground(repo, [p1.node()])
                     # note: the <node> variable contains a random identifier
                     if repo[node].node() in foreground:
-                        pas = [p1]  # allow updating to successors
-                    elif dirty:
+                        pass # allow updating to successors
+                    else:
                         msg = _("uncommitted changes")
-                        if onode is None:
-                            hint = _("commit and merge, or update --clean to"
-                                     " discard changes")
-                        else:
-                            hint = _("commit or update --clean to discard"
-                                     " changes")
-                        raise error.Abort(msg, hint=hint)
-                    else:  # node is none
-                        msg = _("not a linear update")
-                        hint = _("merge or update --check to force update")
+                        hint = _("commit or update --clean to discard changes")
                         raise error.Abort(msg, hint=hint)
                 else:
                     # Allow jumping branches if clean and specific rev given
-                    pas = [p1]
+                    pass
+
+        if overwrite:
+            pas = [wc]
+        elif not branchmerge:
+            pas = [p1]
 
         # deprecated config: merge.followcopies
         followcopies = repo.ui.configbool('merge', 'followcopies', True)
         if overwrite:
-            pas = [wc]
             followcopies = False
-        elif pas == [p2]: # backwards
-            pas = [p1]
         elif not pas[0]:
             followcopies = False
         if not branchmerge and not wc.dirty(missing=True):
diff -r f3807a135e43 -r 455677a7667f mercurial/revset.py
--- a/mercurial/revset.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/revset.py	Mon Feb 13 09:44:16 2017 -0800
@@ -26,9 +26,15 @@
     pycompat,
     registrar,
     repoview,
+    smartset,
     util,
 )
 
+baseset = smartset.baseset
+generatorset = smartset.generatorset
+spanset = smartset.spanset
+fullreposet = smartset.fullreposet
+
 def _revancestors(repo, revs, followfirst):
     """Like revlog.ancestors(), but supports followfirst."""
     if followfirst:
@@ -2940,967 +2946,6 @@
             funcs.add(tree[1][1])
         return funcs
 
-def _formatsetrepr(r):
-    """Format an optional printable representation of a set
-
-    ========  =================================
-    type(r)   example
-    ========  =================================
-    tuple     ('<not %r>', other)
-    str       '<branch closed>'
-    callable  lambda: '<branch %r>' % sorted(b)
-    object    other
-    ========  =================================
-    """
-    if r is None:
-        return ''
-    elif isinstance(r, tuple):
-        return r[0] % r[1:]
-    elif isinstance(r, str):
-        return r
-    elif callable(r):
-        return r()
-    else:
-        return repr(r)
-
-class abstractsmartset(object):
-
-    def __nonzero__(self):
-        """True if the smartset is not empty"""
-        raise NotImplementedError()
-
-    def __contains__(self, rev):
-        """provide fast membership testing"""
-        raise NotImplementedError()
-
-    def __iter__(self):
-        """iterate the set in the order it is supposed to be iterated"""
-        raise NotImplementedError()
-
-    # Attributes containing a function to perform a fast iteration in a given
-    # direction. A smartset can have none, one, or both defined.
-    #
-    # Default value is None instead of a function returning None to avoid
-    # initializing an iterator just for testing if a fast method exists.
-    fastasc = None
-    fastdesc = None
-
-    def isascending(self):
-        """True if the set will iterate in ascending order"""
-        raise NotImplementedError()
-
-    def isdescending(self):
-        """True if the set will iterate in descending order"""
-        raise NotImplementedError()
-
-    def istopo(self):
-        """True if the set will iterate in topographical order"""
-        raise NotImplementedError()
-
-    def min(self):
-        """return the minimum element in the set"""
-        if self.fastasc is None:
-            v = min(self)
-        else:
-            for v in self.fastasc():
-                break
-            else:
-                raise ValueError('arg is an empty sequence')
-        self.min = lambda: v
-        return v
-
-    def max(self):
-        """return the maximum element in the set"""
-        if self.fastdesc is None:
-            return max(self)
-        else:
-            for v in self.fastdesc():
-                break
-            else:
-                raise ValueError('arg is an empty sequence')
-        self.max = lambda: v
-        return v
-
-    def first(self):
-        """return the first element in the set (user iteration perspective)
-
-        Return None if the set is empty"""
-        raise NotImplementedError()
-
-    def last(self):
-        """return the last element in the set (user iteration perspective)
-
-        Return None if the set is empty"""
-        raise NotImplementedError()
-
-    def __len__(self):
-        """return the length of the smartsets
-
-        This can be expensive on smartset that could be lazy otherwise."""
-        raise NotImplementedError()
-
-    def reverse(self):
-        """reverse the expected iteration order"""
-        raise NotImplementedError()
-
-    def sort(self, reverse=True):
-        """get the set to iterate in an ascending or descending order"""
-        raise NotImplementedError()
-
-    def __and__(self, other):
-        """Returns a new object with the intersection of the two collections.
-
-        This is part of the mandatory API for smartset."""
-        if isinstance(other, fullreposet):
-            return self
-        return self.filter(other.__contains__, condrepr=other, cache=False)
-
-    def __add__(self, other):
-        """Returns a new object with the union of the two collections.
-
-        This is part of the mandatory API for smartset."""
-        return addset(self, other)
-
-    def __sub__(self, other):
-        """Returns a new object with the substraction of the two collections.
-
-        This is part of the mandatory API for smartset."""
-        c = other.__contains__
-        return self.filter(lambda r: not c(r), condrepr=('<not %r>', other),
-                           cache=False)
-
-    def filter(self, condition, condrepr=None, cache=True):
-        """Returns this smartset filtered by condition as a new smartset.
-
-        `condition` is a callable which takes a revision number and returns a
-        boolean. Optional `condrepr` provides a printable representation of
-        the given `condition`.
-
-        This is part of the mandatory API for smartset."""
-        # builtin cannot be cached. but do not needs to
-        if cache and util.safehasattr(condition, 'func_code'):
-            condition = util.cachefunc(condition)
-        return filteredset(self, condition, condrepr)
-
-class baseset(abstractsmartset):
-    """Basic data structure that represents a revset and contains the basic
-    operation that it should be able to perform.
-
-    Every method in this class should be implemented by any smartset class.
-    """
-    def __init__(self, data=(), datarepr=None, istopo=False):
-        """
-        datarepr: a tuple of (format, obj, ...), a function or an object that
-                  provides a printable representation of the given data.
-        """
-        self._ascending = None
-        self._istopo = istopo
-        if not isinstance(data, list):
-            if isinstance(data, set):
-                self._set = data
-                # set has no order we pick one for stability purpose
-                self._ascending = True
-            data = list(data)
-        self._list = data
-        self._datarepr = datarepr
-
-    @util.propertycache
-    def _set(self):
-        return set(self._list)
-
-    @util.propertycache
-    def _asclist(self):
-        asclist = self._list[:]
-        asclist.sort()
-        return asclist
-
-    def __iter__(self):
-        if self._ascending is None:
-            return iter(self._list)
-        elif self._ascending:
-            return iter(self._asclist)
-        else:
-            return reversed(self._asclist)
-
-    def fastasc(self):
-        return iter(self._asclist)
-
-    def fastdesc(self):
-        return reversed(self._asclist)
-
-    @util.propertycache
-    def __contains__(self):
-        return self._set.__contains__
-
-    def __nonzero__(self):
-        return bool(self._list)
-
-    def sort(self, reverse=False):
-        self._ascending = not bool(reverse)
-        self._istopo = False
-
-    def reverse(self):
-        if self._ascending is None:
-            self._list.reverse()
-        else:
-            self._ascending = not self._ascending
-        self._istopo = False
-
-    def __len__(self):
-        return len(self._list)
-
-    def isascending(self):
-        """Returns True if the collection is ascending order, False if not.
-
-        This is part of the mandatory API for smartset."""
-        if len(self) <= 1:
-            return True
-        return self._ascending is not None and self._ascending
-
-    def isdescending(self):
-        """Returns True if the collection is descending order, False if not.
-
-        This is part of the mandatory API for smartset."""
-        if len(self) <= 1:
-            return True
-        return self._ascending is not None and not self._ascending
-
-    def istopo(self):
-        """Is the collection is in topographical order or not.
-
-        This is part of the mandatory API for smartset."""
-        if len(self) <= 1:
-            return True
-        return self._istopo
-
-    def first(self):
-        if self:
-            if self._ascending is None:
-                return self._list[0]
-            elif self._ascending:
-                return self._asclist[0]
-            else:
-                return self._asclist[-1]
-        return None
-
-    def last(self):
-        if self:
-            if self._ascending is None:
-                return self._list[-1]
-            elif self._ascending:
-                return self._asclist[-1]
-            else:
-                return self._asclist[0]
-        return None
-
-    def __repr__(self):
-        d = {None: '', False: '-', True: '+'}[self._ascending]
-        s = _formatsetrepr(self._datarepr)
-        if not s:
-            l = self._list
-            # if _list has been built from a set, it might have a different
-            # order from one python implementation to another.
-            # We fallback to the sorted version for a stable output.
-            if self._ascending is not None:
-                l = self._asclist
-            s = repr(l)
-        return '<%s%s %s>' % (type(self).__name__, d, s)
-
-class filteredset(abstractsmartset):
-    """Duck type for baseset class which iterates lazily over the revisions in
-    the subset and contains a function which tests for membership in the
-    revset
-    """
-    def __init__(self, subset, condition=lambda x: True, condrepr=None):
-        """
-        condition: a function that decide whether a revision in the subset
-                   belongs to the revset or not.
-        condrepr: a tuple of (format, obj, ...), a function or an object that
-                  provides a printable representation of the given condition.
-        """
-        self._subset = subset
-        self._condition = condition
-        self._condrepr = condrepr
-
-    def __contains__(self, x):
-        return x in self._subset and self._condition(x)
-
-    def __iter__(self):
-        return self._iterfilter(self._subset)
-
-    def _iterfilter(self, it):
-        cond = self._condition
-        for x in it:
-            if cond(x):
-                yield x
-
-    @property
-    def fastasc(self):
-        it = self._subset.fastasc
-        if it is None:
-            return None
-        return lambda: self._iterfilter(it())
-
-    @property
-    def fastdesc(self):
-        it = self._subset.fastdesc
-        if it is None:
-            return None
-        return lambda: self._iterfilter(it())
-
-    def __nonzero__(self):
-        fast = None
-        candidates = [self.fastasc if self.isascending() else None,
-                      self.fastdesc if self.isdescending() else None,
-                      self.fastasc,
-                      self.fastdesc]
-        for candidate in candidates:
-            if candidate is not None:
-                fast = candidate
-                break
-
-        if fast is not None:
-            it = fast()
-        else:
-            it = self
-
-        for r in it:
-            return True
-        return False
-
-    def __len__(self):
-        # Basic implementation to be changed in future patches.
-        # until this gets improved, we use generator expression
-        # here, since list comprehensions are free to call __len__ again
-        # causing infinite recursion
-        l = baseset(r for r in self)
-        return len(l)
-
-    def sort(self, reverse=False):
-        self._subset.sort(reverse=reverse)
-
-    def reverse(self):
-        self._subset.reverse()
-
-    def isascending(self):
-        return self._subset.isascending()
-
-    def isdescending(self):
-        return self._subset.isdescending()
-
-    def istopo(self):
-        return self._subset.istopo()
-
-    def first(self):
-        for x in self:
-            return x
-        return None
-
-    def last(self):
-        it = None
-        if self.isascending():
-            it = self.fastdesc
-        elif self.isdescending():
-            it = self.fastasc
-        if it is not None:
-            for x in it():
-                return x
-            return None #empty case
-        else:
-            x = None
-            for x in self:
-                pass
-            return x
-
-    def __repr__(self):
-        xs = [repr(self._subset)]
-        s = _formatsetrepr(self._condrepr)
-        if s:
-            xs.append(s)
-        return '<%s %s>' % (type(self).__name__, ', '.join(xs))
-
-def _iterordered(ascending, iter1, iter2):
-    """produce an ordered iteration from two iterators with the same order
-
-    The ascending is used to indicated the iteration direction.
-    """
-    choice = max
-    if ascending:
-        choice = min
-
-    val1 = None
-    val2 = None
-    try:
-        # Consume both iterators in an ordered way until one is empty
-        while True:
-            if val1 is None:
-                val1 = next(iter1)
-            if val2 is None:
-                val2 = next(iter2)
-            n = choice(val1, val2)
-            yield n
-            if val1 == n:
-                val1 = None
-            if val2 == n:
-                val2 = None
-    except StopIteration:
-        # Flush any remaining values and consume the other one
-        it = iter2
-        if val1 is not None:
-            yield val1
-            it = iter1
-        elif val2 is not None:
-            # might have been equality and both are empty
-            yield val2
-        for val in it:
-            yield val
-
-class addset(abstractsmartset):
-    """Represent the addition of two sets
-
-    Wrapper structure for lazily adding two structures without losing much
-    performance on the __contains__ method
-
-    If the ascending attribute is set, that means the two structures are
-    ordered in either an ascending or descending way. Therefore, we can add
-    them maintaining the order by iterating over both at the same time
-
-    >>> xs = baseset([0, 3, 2])
-    >>> ys = baseset([5, 2, 4])
-
-    >>> rs = addset(xs, ys)
-    >>> bool(rs), 0 in rs, 1 in rs, 5 in rs, rs.first(), rs.last()
-    (True, True, False, True, 0, 4)
-    >>> rs = addset(xs, baseset([]))
-    >>> bool(rs), 0 in rs, 1 in rs, rs.first(), rs.last()
-    (True, True, False, 0, 2)
-    >>> rs = addset(baseset([]), baseset([]))
-    >>> bool(rs), 0 in rs, rs.first(), rs.last()
-    (False, False, None, None)
-
-    iterate unsorted:
-    >>> rs = addset(xs, ys)
-    >>> # (use generator because pypy could call len())
-    >>> list(x for x in rs)  # without _genlist
-    [0, 3, 2, 5, 4]
-    >>> assert not rs._genlist
-    >>> len(rs)
-    5
-    >>> [x for x in rs]  # with _genlist
-    [0, 3, 2, 5, 4]
-    >>> assert rs._genlist
-
-    iterate ascending:
-    >>> rs = addset(xs, ys, ascending=True)
-    >>> # (use generator because pypy could call len())
-    >>> list(x for x in rs), list(x for x in rs.fastasc())  # without _asclist
-    ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
-    >>> assert not rs._asclist
-    >>> len(rs)
-    5
-    >>> [x for x in rs], [x for x in rs.fastasc()]
-    ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
-    >>> assert rs._asclist
-
-    iterate descending:
-    >>> rs = addset(xs, ys, ascending=False)
-    >>> # (use generator because pypy could call len())
-    >>> list(x for x in rs), list(x for x in rs.fastdesc())  # without _asclist
-    ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
-    >>> assert not rs._asclist
-    >>> len(rs)
-    5
-    >>> [x for x in rs], [x for x in rs.fastdesc()]
-    ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
-    >>> assert rs._asclist
-
-    iterate ascending without fastasc:
-    >>> rs = addset(xs, generatorset(ys), ascending=True)
-    >>> assert rs.fastasc is None
-    >>> [x for x in rs]
-    [0, 2, 3, 4, 5]
-
-    iterate descending without fastdesc:
-    >>> rs = addset(generatorset(xs), ys, ascending=False)
-    >>> assert rs.fastdesc is None
-    >>> [x for x in rs]
-    [5, 4, 3, 2, 0]
-    """
-    def __init__(self, revs1, revs2, ascending=None):
-        self._r1 = revs1
-        self._r2 = revs2
-        self._iter = None
-        self._ascending = ascending
-        self._genlist = None
-        self._asclist = None
-
-    def __len__(self):
-        return len(self._list)
-
-    def __nonzero__(self):
-        return bool(self._r1) or bool(self._r2)
-
-    @util.propertycache
-    def _list(self):
-        if not self._genlist:
-            self._genlist = baseset(iter(self))
-        return self._genlist
-
-    def __iter__(self):
-        """Iterate over both collections without repeating elements
-
-        If the ascending attribute is not set, iterate over the first one and
-        then over the second one checking for membership on the first one so we
-        dont yield any duplicates.
-
-        If the ascending attribute is set, iterate over both collections at the
-        same time, yielding only one value at a time in the given order.
-        """
-        if self._ascending is None:
-            if self._genlist:
-                return iter(self._genlist)
-            def arbitraryordergen():
-                for r in self._r1:
-                    yield r
-                inr1 = self._r1.__contains__
-                for r in self._r2:
-                    if not inr1(r):
-                        yield r
-            return arbitraryordergen()
-        # try to use our own fast iterator if it exists
-        self._trysetasclist()
-        if self._ascending:
-            attr = 'fastasc'
-        else:
-            attr = 'fastdesc'
-        it = getattr(self, attr)
-        if it is not None:
-            return it()
-        # maybe half of the component supports fast
-        # get iterator for _r1
-        iter1 = getattr(self._r1, attr)
-        if iter1 is None:
-            # let's avoid side effect (not sure it matters)
-            iter1 = iter(sorted(self._r1, reverse=not self._ascending))
-        else:
-            iter1 = iter1()
-        # get iterator for _r2
-        iter2 = getattr(self._r2, attr)
-        if iter2 is None:
-            # let's avoid side effect (not sure it matters)
-            iter2 = iter(sorted(self._r2, reverse=not self._ascending))
-        else:
-            iter2 = iter2()
-        return _iterordered(self._ascending, iter1, iter2)
-
-    def _trysetasclist(self):
-        """populate the _asclist attribute if possible and necessary"""
-        if self._genlist is not None and self._asclist is None:
-            self._asclist = sorted(self._genlist)
-
-    @property
-    def fastasc(self):
-        self._trysetasclist()
-        if self._asclist is not None:
-            return self._asclist.__iter__
-        iter1 = self._r1.fastasc
-        iter2 = self._r2.fastasc
-        if None in (iter1, iter2):
-            return None
-        return lambda: _iterordered(True, iter1(), iter2())
-
-    @property
-    def fastdesc(self):
-        self._trysetasclist()
-        if self._asclist is not None:
-            return self._asclist.__reversed__
-        iter1 = self._r1.fastdesc
-        iter2 = self._r2.fastdesc
-        if None in (iter1, iter2):
-            return None
-        return lambda: _iterordered(False, iter1(), iter2())
-
-    def __contains__(self, x):
-        return x in self._r1 or x in self._r2
-
-    def sort(self, reverse=False):
-        """Sort the added set
-
-        For this we use the cached list with all the generated values and if we
-        know they are ascending or descending we can sort them in a smart way.
-        """
-        self._ascending = not reverse
-
-    def isascending(self):
-        return self._ascending is not None and self._ascending
-
-    def isdescending(self):
-        return self._ascending is not None and not self._ascending
-
-    def istopo(self):
-        # not worth the trouble asserting if the two sets combined are still
-        # in topographical order. Use the sort() predicate to explicitly sort
-        # again instead.
-        return False
-
-    def reverse(self):
-        if self._ascending is None:
-            self._list.reverse()
-        else:
-            self._ascending = not self._ascending
-
-    def first(self):
-        for x in self:
-            return x
-        return None
-
-    def last(self):
-        self.reverse()
-        val = self.first()
-        self.reverse()
-        return val
-
-    def __repr__(self):
-        d = {None: '', False: '-', True: '+'}[self._ascending]
-        return '<%s%s %r, %r>' % (type(self).__name__, d, self._r1, self._r2)
-
-class generatorset(abstractsmartset):
-    """Wrap a generator for lazy iteration
-
-    Wrapper structure for generators that provides lazy membership and can
-    be iterated more than once.
-    When asked for membership it generates values until either it finds the
-    requested one or has gone through all the elements in the generator
-    """
-    def __init__(self, gen, iterasc=None):
-        """
-        gen: a generator producing the values for the generatorset.
-        """
-        self._gen = gen
-        self._asclist = None
-        self._cache = {}
-        self._genlist = []
-        self._finished = False
-        self._ascending = True
-        if iterasc is not None:
-            if iterasc:
-                self.fastasc = self._iterator
-                self.__contains__ = self._asccontains
-            else:
-                self.fastdesc = self._iterator
-                self.__contains__ = self._desccontains
-
-    def __nonzero__(self):
-        # Do not use 'for r in self' because it will enforce the iteration
-        # order (default ascending), possibly unrolling a whole descending
-        # iterator.
-        if self._genlist:
-            return True
-        for r in self._consumegen():
-            return True
-        return False
-
-    def __contains__(self, x):
-        if x in self._cache:
-            return self._cache[x]
-
-        # Use new values only, as existing values would be cached.
-        for l in self._consumegen():
-            if l == x:
-                return True
-
-        self._cache[x] = False
-        return False
-
-    def _asccontains(self, x):
-        """version of contains optimised for ascending generator"""
-        if x in self._cache:
-            return self._cache[x]
-
-        # Use new values only, as existing values would be cached.
-        for l in self._consumegen():
-            if l == x:
-                return True
-            if l > x:
-                break
-
-        self._cache[x] = False
-        return False
-
-    def _desccontains(self, x):
-        """version of contains optimised for descending generator"""
-        if x in self._cache:
-            return self._cache[x]
-
-        # Use new values only, as existing values would be cached.
-        for l in self._consumegen():
-            if l == x:
-                return True
-            if l < x:
-                break
-
-        self._cache[x] = False
-        return False
-
-    def __iter__(self):
-        if self._ascending:
-            it = self.fastasc
-        else:
-            it = self.fastdesc
-        if it is not None:
-            return it()
-        # we need to consume the iterator
-        for x in self._consumegen():
-            pass
-        # recall the same code
-        return iter(self)
-
-    def _iterator(self):
-        if self._finished:
-            return iter(self._genlist)
-
-        # We have to use this complex iteration strategy to allow multiple
-        # iterations at the same time. We need to be able to catch revision
-        # removed from _consumegen and added to genlist in another instance.
-        #
-        # Getting rid of it would provide an about 15% speed up on this
-        # iteration.
-        genlist = self._genlist
-        nextrev = self._consumegen().next
-        _len = len # cache global lookup
-        def gen():
-            i = 0
-            while True:
-                if i < _len(genlist):
-                    yield genlist[i]
-                else:
-                    yield nextrev()
-                i += 1
-        return gen()
-
-    def _consumegen(self):
-        cache = self._cache
-        genlist = self._genlist.append
-        for item in self._gen:
-            cache[item] = True
-            genlist(item)
-            yield item
-        if not self._finished:
-            self._finished = True
-            asc = self._genlist[:]
-            asc.sort()
-            self._asclist = asc
-            self.fastasc = asc.__iter__
-            self.fastdesc = asc.__reversed__
-
-    def __len__(self):
-        for x in self._consumegen():
-            pass
-        return len(self._genlist)
-
-    def sort(self, reverse=False):
-        self._ascending = not reverse
-
-    def reverse(self):
-        self._ascending = not self._ascending
-
-    def isascending(self):
-        return self._ascending
-
-    def isdescending(self):
-        return not self._ascending
-
-    def istopo(self):
-        # not worth the trouble asserting if the two sets combined are still
-        # in topographical order. Use the sort() predicate to explicitly sort
-        # again instead.
-        return False
-
-    def first(self):
-        if self._ascending:
-            it = self.fastasc
-        else:
-            it = self.fastdesc
-        if it is None:
-            # we need to consume all and try again
-            for x in self._consumegen():
-                pass
-            return self.first()
-        return next(it(), None)
-
-    def last(self):
-        if self._ascending:
-            it = self.fastdesc
-        else:
-            it = self.fastasc
-        if it is None:
-            # we need to consume all and try again
-            for x in self._consumegen():
-                pass
-            return self.first()
-        return next(it(), None)
-
-    def __repr__(self):
-        d = {False: '-', True: '+'}[self._ascending]
-        return '<%s%s>' % (type(self).__name__, d)
-
-class spanset(abstractsmartset):
-    """Duck type for baseset class which represents a range of revisions and
-    can work lazily and without having all the range in memory
-
-    Note that spanset(x, y) behave almost like xrange(x, y) except for two
-    notable points:
-    - when x < y it will be automatically descending,
-    - revision filtered with this repoview will be skipped.
-
-    """
-    def __init__(self, repo, start=0, end=None):
-        """
-        start: first revision included the set
-               (default to 0)
-        end:   first revision excluded (last+1)
-               (default to len(repo)
-
-        Spanset will be descending if `end` < `start`.
-        """
-        if end is None:
-            end = len(repo)
-        self._ascending = start <= end
-        if not self._ascending:
-            start, end = end + 1, start +1
-        self._start = start
-        self._end = end
-        self._hiddenrevs = repo.changelog.filteredrevs
-
-    def sort(self, reverse=False):
-        self._ascending = not reverse
-
-    def reverse(self):
-        self._ascending = not self._ascending
-
-    def istopo(self):
-        # not worth the trouble asserting if the two sets combined are still
-        # in topographical order. Use the sort() predicate to explicitly sort
-        # again instead.
-        return False
-
-    def _iterfilter(self, iterrange):
-        s = self._hiddenrevs
-        for r in iterrange:
-            if r not in s:
-                yield r
-
-    def __iter__(self):
-        if self._ascending:
-            return self.fastasc()
-        else:
-            return self.fastdesc()
-
-    def fastasc(self):
-        iterrange = xrange(self._start, self._end)
-        if self._hiddenrevs:
-            return self._iterfilter(iterrange)
-        return iter(iterrange)
-
-    def fastdesc(self):
-        iterrange = xrange(self._end - 1, self._start - 1, -1)
-        if self._hiddenrevs:
-            return self._iterfilter(iterrange)
-        return iter(iterrange)
-
-    def __contains__(self, rev):
-        hidden = self._hiddenrevs
-        return ((self._start <= rev < self._end)
-                and not (hidden and rev in hidden))
-
-    def __nonzero__(self):
-        for r in self:
-            return True
-        return False
-
-    def __len__(self):
-        if not self._hiddenrevs:
-            return abs(self._end - self._start)
-        else:
-            count = 0
-            start = self._start
-            end = self._end
-            for rev in self._hiddenrevs:
-                if (end < rev <= start) or (start <= rev < end):
-                    count += 1
-            return abs(self._end - self._start) - count
-
-    def isascending(self):
-        return self._ascending
-
-    def isdescending(self):
-        return not self._ascending
-
-    def first(self):
-        if self._ascending:
-            it = self.fastasc
-        else:
-            it = self.fastdesc
-        for x in it():
-            return x
-        return None
-
-    def last(self):
-        if self._ascending:
-            it = self.fastdesc
-        else:
-            it = self.fastasc
-        for x in it():
-            return x
-        return None
-
-    def __repr__(self):
-        d = {False: '-', True: '+'}[self._ascending]
-        return '<%s%s %d:%d>' % (type(self).__name__, d,
-                                 self._start, self._end - 1)
-
-class fullreposet(spanset):
-    """a set containing all revisions in the repo
-
-    This class exists to host special optimization and magic to handle virtual
-    revisions such as "null".
-    """
-
-    def __init__(self, repo):
-        super(fullreposet, self).__init__(repo)
-
-    def __and__(self, other):
-        """As self contains the whole repo, all of the other set should also be
-        in self. Therefore `self & other = other`.
-
-        This boldly assumes the other contains valid revs only.
-        """
-        # other not a smartset, make is so
-        if not util.safehasattr(other, 'isascending'):
-            # filter out hidden revision
-            # (this boldly assumes all smartset are pure)
-            #
-            # `other` was used with "&", let's assume this is a set like
-            # object.
-            other = baseset(other - self._hiddenrevs)
-
-        other.sort(reverse=self.isdescending())
-        return other
-
-def prettyformatset(revs):
-    lines = []
-    rs = repr(revs)
-    p = 0
-    while p < len(rs):
-        q = rs.find('<', p + 1)
-        if q < 0:
-            q = len(rs)
-        l = rs.count('<', 0, p) - rs.count('>', 0, p)
-        assert l >= 0
-        lines.append((l, rs[p:q].rstrip()))
-        p = q
-    return '\n'.join('  ' * l + s for l, s in lines)
-
 def loadpredicate(ui, extname, registrarobj):
     """Load revset predicates from specified registrarobj
     """
diff -r f3807a135e43 -r 455677a7667f mercurial/smartset.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/smartset.py	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,973 @@
+# smartset.py - data structure for revision set
+#
+# Copyright 2010 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+from . import (
+    util,
+)
+
+def _formatsetrepr(r):
+    """Format an optional printable representation of a set
+
+    ========  =================================
+    type(r)   example
+    ========  =================================
+    tuple     ('<not %r>', other)
+    str       '<branch closed>'
+    callable  lambda: '<branch %r>' % sorted(b)
+    object    other
+    ========  =================================
+    """
+    if r is None:
+        return ''
+    elif isinstance(r, tuple):
+        return r[0] % r[1:]
+    elif isinstance(r, str):
+        return r
+    elif callable(r):
+        return r()
+    else:
+        return repr(r)
+
+class abstractsmartset(object):
+
+    def __nonzero__(self):
+        """True if the smartset is not empty"""
+        raise NotImplementedError()
+
+    def __contains__(self, rev):
+        """provide fast membership testing"""
+        raise NotImplementedError()
+
+    def __iter__(self):
+        """iterate the set in the order it is supposed to be iterated"""
+        raise NotImplementedError()
+
+    # Attributes containing a function to perform a fast iteration in a given
+    # direction. A smartset can have none, one, or both defined.
+    #
+    # Default value is None instead of a function returning None to avoid
+    # initializing an iterator just for testing if a fast method exists.
+    fastasc = None
+    fastdesc = None
+
+    def isascending(self):
+        """True if the set will iterate in ascending order"""
+        raise NotImplementedError()
+
+    def isdescending(self):
+        """True if the set will iterate in descending order"""
+        raise NotImplementedError()
+
+    def istopo(self):
+        """True if the set will iterate in topographical order"""
+        raise NotImplementedError()
+
+    def min(self):
+        """return the minimum element in the set"""
+        if self.fastasc is None:
+            v = min(self)
+        else:
+            for v in self.fastasc():
+                break
+            else:
+                raise ValueError('arg is an empty sequence')
+        self.min = lambda: v
+        return v
+
+    def max(self):
+        """return the maximum element in the set"""
+        if self.fastdesc is None:
+            return max(self)
+        else:
+            for v in self.fastdesc():
+                break
+            else:
+                raise ValueError('arg is an empty sequence')
+        self.max = lambda: v
+        return v
+
+    def first(self):
+        """return the first element in the set (user iteration perspective)
+
+        Return None if the set is empty"""
+        raise NotImplementedError()
+
+    def last(self):
+        """return the last element in the set (user iteration perspective)
+
+        Return None if the set is empty"""
+        raise NotImplementedError()
+
+    def __len__(self):
+        """return the length of the smartsets
+
+        This can be expensive on smartset that could be lazy otherwise."""
+        raise NotImplementedError()
+
+    def reverse(self):
+        """reverse the expected iteration order"""
+        raise NotImplementedError()
+
+    def sort(self, reverse=True):
+        """get the set to iterate in an ascending or descending order"""
+        raise NotImplementedError()
+
+    def __and__(self, other):
+        """Returns a new object with the intersection of the two collections.
+
+        This is part of the mandatory API for smartset."""
+        if isinstance(other, fullreposet):
+            return self
+        return self.filter(other.__contains__, condrepr=other, cache=False)
+
+    def __add__(self, other):
+        """Returns a new object with the union of the two collections.
+
+        This is part of the mandatory API for smartset."""
+        return addset(self, other)
+
+    def __sub__(self, other):
+        """Returns a new object with the substraction of the two collections.
+
+        This is part of the mandatory API for smartset."""
+        c = other.__contains__
+        return self.filter(lambda r: not c(r), condrepr=('<not %r>', other),
+                           cache=False)
+
+    def filter(self, condition, condrepr=None, cache=True):
+        """Returns this smartset filtered by condition as a new smartset.
+
+        `condition` is a callable which takes a revision number and returns a
+        boolean. Optional `condrepr` provides a printable representation of
+        the given `condition`.
+
+        This is part of the mandatory API for smartset."""
+        # builtin cannot be cached. but do not needs to
+        if cache and util.safehasattr(condition, 'func_code'):
+            condition = util.cachefunc(condition)
+        return filteredset(self, condition, condrepr)
+
+class baseset(abstractsmartset):
+    """Basic data structure that represents a revset and contains the basic
+    operation that it should be able to perform.
+
+    Every method in this class should be implemented by any smartset class.
+    """
+    def __init__(self, data=(), datarepr=None, istopo=False):
+        """
+        datarepr: a tuple of (format, obj, ...), a function or an object that
+                  provides a printable representation of the given data.
+        """
+        self._ascending = None
+        self._istopo = istopo
+        if not isinstance(data, list):
+            if isinstance(data, set):
+                self._set = data
+                # set has no order we pick one for stability purpose
+                self._ascending = True
+            data = list(data)
+        self._list = data
+        self._datarepr = datarepr
+
+    @util.propertycache
+    def _set(self):
+        return set(self._list)
+
+    @util.propertycache
+    def _asclist(self):
+        asclist = self._list[:]
+        asclist.sort()
+        return asclist
+
+    def __iter__(self):
+        if self._ascending is None:
+            return iter(self._list)
+        elif self._ascending:
+            return iter(self._asclist)
+        else:
+            return reversed(self._asclist)
+
+    def fastasc(self):
+        return iter(self._asclist)
+
+    def fastdesc(self):
+        return reversed(self._asclist)
+
+    @util.propertycache
+    def __contains__(self):
+        return self._set.__contains__
+
+    def __nonzero__(self):
+        return bool(self._list)
+
+    def sort(self, reverse=False):
+        self._ascending = not bool(reverse)
+        self._istopo = False
+
+    def reverse(self):
+        if self._ascending is None:
+            self._list.reverse()
+        else:
+            self._ascending = not self._ascending
+        self._istopo = False
+
+    def __len__(self):
+        return len(self._list)
+
+    def isascending(self):
+        """Returns True if the collection is ascending order, False if not.
+
+        This is part of the mandatory API for smartset."""
+        if len(self) <= 1:
+            return True
+        return self._ascending is not None and self._ascending
+
+    def isdescending(self):
+        """Returns True if the collection is descending order, False if not.
+
+        This is part of the mandatory API for smartset."""
+        if len(self) <= 1:
+            return True
+        return self._ascending is not None and not self._ascending
+
+    def istopo(self):
+        """Is the collection is in topographical order or not.
+
+        This is part of the mandatory API for smartset."""
+        if len(self) <= 1:
+            return True
+        return self._istopo
+
+    def first(self):
+        if self:
+            if self._ascending is None:
+                return self._list[0]
+            elif self._ascending:
+                return self._asclist[0]
+            else:
+                return self._asclist[-1]
+        return None
+
+    def last(self):
+        if self:
+            if self._ascending is None:
+                return self._list[-1]
+            elif self._ascending:
+                return self._asclist[-1]
+            else:
+                return self._asclist[0]
+        return None
+
+    def __repr__(self):
+        d = {None: '', False: '-', True: '+'}[self._ascending]
+        s = _formatsetrepr(self._datarepr)
+        if not s:
+            l = self._list
+            # if _list has been built from a set, it might have a different
+            # order from one python implementation to another.
+            # We fallback to the sorted version for a stable output.
+            if self._ascending is not None:
+                l = self._asclist
+            s = repr(l)
+        return '<%s%s %s>' % (type(self).__name__, d, s)
+
+class filteredset(abstractsmartset):
+    """Duck type for baseset class which iterates lazily over the revisions in
+    the subset and contains a function which tests for membership in the
+    revset
+    """
+    def __init__(self, subset, condition=lambda x: True, condrepr=None):
+        """
+        condition: a function that decide whether a revision in the subset
+                   belongs to the revset or not.
+        condrepr: a tuple of (format, obj, ...), a function or an object that
+                  provides a printable representation of the given condition.
+        """
+        self._subset = subset
+        self._condition = condition
+        self._condrepr = condrepr
+
+    def __contains__(self, x):
+        return x in self._subset and self._condition(x)
+
+    def __iter__(self):
+        return self._iterfilter(self._subset)
+
+    def _iterfilter(self, it):
+        cond = self._condition
+        for x in it:
+            if cond(x):
+                yield x
+
+    @property
+    def fastasc(self):
+        it = self._subset.fastasc
+        if it is None:
+            return None
+        return lambda: self._iterfilter(it())
+
+    @property
+    def fastdesc(self):
+        it = self._subset.fastdesc
+        if it is None:
+            return None
+        return lambda: self._iterfilter(it())
+
+    def __nonzero__(self):
+        fast = None
+        candidates = [self.fastasc if self.isascending() else None,
+                      self.fastdesc if self.isdescending() else None,
+                      self.fastasc,
+                      self.fastdesc]
+        for candidate in candidates:
+            if candidate is not None:
+                fast = candidate
+                break
+
+        if fast is not None:
+            it = fast()
+        else:
+            it = self
+
+        for r in it:
+            return True
+        return False
+
+    def __len__(self):
+        # Basic implementation to be changed in future patches.
+        # until this gets improved, we use generator expression
+        # here, since list comprehensions are free to call __len__ again
+        # causing infinite recursion
+        l = baseset(r for r in self)
+        return len(l)
+
+    def sort(self, reverse=False):
+        self._subset.sort(reverse=reverse)
+
+    def reverse(self):
+        self._subset.reverse()
+
+    def isascending(self):
+        return self._subset.isascending()
+
+    def isdescending(self):
+        return self._subset.isdescending()
+
+    def istopo(self):
+        return self._subset.istopo()
+
+    def first(self):
+        for x in self:
+            return x
+        return None
+
+    def last(self):
+        it = None
+        if self.isascending():
+            it = self.fastdesc
+        elif self.isdescending():
+            it = self.fastasc
+        if it is not None:
+            for x in it():
+                return x
+            return None #empty case
+        else:
+            x = None
+            for x in self:
+                pass
+            return x
+
+    def __repr__(self):
+        xs = [repr(self._subset)]
+        s = _formatsetrepr(self._condrepr)
+        if s:
+            xs.append(s)
+        return '<%s %s>' % (type(self).__name__, ', '.join(xs))
+
+def _iterordered(ascending, iter1, iter2):
+    """produce an ordered iteration from two iterators with the same order
+
+    The ascending is used to indicated the iteration direction.
+    """
+    choice = max
+    if ascending:
+        choice = min
+
+    val1 = None
+    val2 = None
+    try:
+        # Consume both iterators in an ordered way until one is empty
+        while True:
+            if val1 is None:
+                val1 = next(iter1)
+            if val2 is None:
+                val2 = next(iter2)
+            n = choice(val1, val2)
+            yield n
+            if val1 == n:
+                val1 = None
+            if val2 == n:
+                val2 = None
+    except StopIteration:
+        # Flush any remaining values and consume the other one
+        it = iter2
+        if val1 is not None:
+            yield val1
+            it = iter1
+        elif val2 is not None:
+            # might have been equality and both are empty
+            yield val2
+        for val in it:
+            yield val
+
+class addset(abstractsmartset):
+    """Represent the addition of two sets
+
+    Wrapper structure for lazily adding two structures without losing much
+    performance on the __contains__ method
+
+    If the ascending attribute is set, that means the two structures are
+    ordered in either an ascending or descending way. Therefore, we can add
+    them maintaining the order by iterating over both at the same time
+
+    >>> xs = baseset([0, 3, 2])
+    >>> ys = baseset([5, 2, 4])
+
+    >>> rs = addset(xs, ys)
+    >>> bool(rs), 0 in rs, 1 in rs, 5 in rs, rs.first(), rs.last()
+    (True, True, False, True, 0, 4)
+    >>> rs = addset(xs, baseset([]))
+    >>> bool(rs), 0 in rs, 1 in rs, rs.first(), rs.last()
+    (True, True, False, 0, 2)
+    >>> rs = addset(baseset([]), baseset([]))
+    >>> bool(rs), 0 in rs, rs.first(), rs.last()
+    (False, False, None, None)
+
+    iterate unsorted:
+    >>> rs = addset(xs, ys)
+    >>> # (use generator because pypy could call len())
+    >>> list(x for x in rs)  # without _genlist
+    [0, 3, 2, 5, 4]
+    >>> assert not rs._genlist
+    >>> len(rs)
+    5
+    >>> [x for x in rs]  # with _genlist
+    [0, 3, 2, 5, 4]
+    >>> assert rs._genlist
+
+    iterate ascending:
+    >>> rs = addset(xs, ys, ascending=True)
+    >>> # (use generator because pypy could call len())
+    >>> list(x for x in rs), list(x for x in rs.fastasc())  # without _asclist
+    ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
+    >>> assert not rs._asclist
+    >>> len(rs)
+    5
+    >>> [x for x in rs], [x for x in rs.fastasc()]
+    ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
+    >>> assert rs._asclist
+
+    iterate descending:
+    >>> rs = addset(xs, ys, ascending=False)
+    >>> # (use generator because pypy could call len())
+    >>> list(x for x in rs), list(x for x in rs.fastdesc())  # without _asclist
+    ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
+    >>> assert not rs._asclist
+    >>> len(rs)
+    5
+    >>> [x for x in rs], [x for x in rs.fastdesc()]
+    ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
+    >>> assert rs._asclist
+
+    iterate ascending without fastasc:
+    >>> rs = addset(xs, generatorset(ys), ascending=True)
+    >>> assert rs.fastasc is None
+    >>> [x for x in rs]
+    [0, 2, 3, 4, 5]
+
+    iterate descending without fastdesc:
+    >>> rs = addset(generatorset(xs), ys, ascending=False)
+    >>> assert rs.fastdesc is None
+    >>> [x for x in rs]
+    [5, 4, 3, 2, 0]
+    """
+    def __init__(self, revs1, revs2, ascending=None):
+        self._r1 = revs1
+        self._r2 = revs2
+        self._iter = None
+        self._ascending = ascending
+        self._genlist = None
+        self._asclist = None
+
+    def __len__(self):
+        return len(self._list)
+
+    def __nonzero__(self):
+        return bool(self._r1) or bool(self._r2)
+
+    @util.propertycache
+    def _list(self):
+        if not self._genlist:
+            self._genlist = baseset(iter(self))
+        return self._genlist
+
+    def __iter__(self):
+        """Iterate over both collections without repeating elements
+
+        If the ascending attribute is not set, iterate over the first one and
+        then over the second one checking for membership on the first one so we
+        dont yield any duplicates.
+
+        If the ascending attribute is set, iterate over both collections at the
+        same time, yielding only one value at a time in the given order.
+        """
+        if self._ascending is None:
+            if self._genlist:
+                return iter(self._genlist)
+            def arbitraryordergen():
+                for r in self._r1:
+                    yield r
+                inr1 = self._r1.__contains__
+                for r in self._r2:
+                    if not inr1(r):
+                        yield r
+            return arbitraryordergen()
+        # try to use our own fast iterator if it exists
+        self._trysetasclist()
+        if self._ascending:
+            attr = 'fastasc'
+        else:
+            attr = 'fastdesc'
+        it = getattr(self, attr)
+        if it is not None:
+            return it()
+        # maybe half of the component supports fast
+        # get iterator for _r1
+        iter1 = getattr(self._r1, attr)
+        if iter1 is None:
+            # let's avoid side effect (not sure it matters)
+            iter1 = iter(sorted(self._r1, reverse=not self._ascending))
+        else:
+            iter1 = iter1()
+        # get iterator for _r2
+        iter2 = getattr(self._r2, attr)
+        if iter2 is None:
+            # let's avoid side effect (not sure it matters)
+            iter2 = iter(sorted(self._r2, reverse=not self._ascending))
+        else:
+            iter2 = iter2()
+        return _iterordered(self._ascending, iter1, iter2)
+
+    def _trysetasclist(self):
+        """populate the _asclist attribute if possible and necessary"""
+        if self._genlist is not None and self._asclist is None:
+            self._asclist = sorted(self._genlist)
+
+    @property
+    def fastasc(self):
+        self._trysetasclist()
+        if self._asclist is not None:
+            return self._asclist.__iter__
+        iter1 = self._r1.fastasc
+        iter2 = self._r2.fastasc
+        if None in (iter1, iter2):
+            return None
+        return lambda: _iterordered(True, iter1(), iter2())
+
+    @property
+    def fastdesc(self):
+        self._trysetasclist()
+        if self._asclist is not None:
+            return self._asclist.__reversed__
+        iter1 = self._r1.fastdesc
+        iter2 = self._r2.fastdesc
+        if None in (iter1, iter2):
+            return None
+        return lambda: _iterordered(False, iter1(), iter2())
+
+    def __contains__(self, x):
+        return x in self._r1 or x in self._r2
+
+    def sort(self, reverse=False):
+        """Sort the added set
+
+        For this we use the cached list with all the generated values and if we
+        know they are ascending or descending we can sort them in a smart way.
+        """
+        self._ascending = not reverse
+
+    def isascending(self):
+        return self._ascending is not None and self._ascending
+
+    def isdescending(self):
+        return self._ascending is not None and not self._ascending
+
+    def istopo(self):
+        # not worth the trouble asserting if the two sets combined are still
+        # in topographical order. Use the sort() predicate to explicitly sort
+        # again instead.
+        return False
+
+    def reverse(self):
+        if self._ascending is None:
+            self._list.reverse()
+        else:
+            self._ascending = not self._ascending
+
+    def first(self):
+        for x in self:
+            return x
+        return None
+
+    def last(self):
+        self.reverse()
+        val = self.first()
+        self.reverse()
+        return val
+
+    def __repr__(self):
+        d = {None: '', False: '-', True: '+'}[self._ascending]
+        return '<%s%s %r, %r>' % (type(self).__name__, d, self._r1, self._r2)
+
+class generatorset(abstractsmartset):
+    """Wrap a generator for lazy iteration
+
+    Wrapper structure for generators that provides lazy membership and can
+    be iterated more than once.
+    When asked for membership it generates values until either it finds the
+    requested one or has gone through all the elements in the generator
+    """
+    def __init__(self, gen, iterasc=None):
+        """
+        gen: a generator producing the values for the generatorset.
+        """
+        self._gen = gen
+        self._asclist = None
+        self._cache = {}
+        self._genlist = []
+        self._finished = False
+        self._ascending = True
+        if iterasc is not None:
+            if iterasc:
+                self.fastasc = self._iterator
+                self.__contains__ = self._asccontains
+            else:
+                self.fastdesc = self._iterator
+                self.__contains__ = self._desccontains
+
+    def __nonzero__(self):
+        # Do not use 'for r in self' because it will enforce the iteration
+        # order (default ascending), possibly unrolling a whole descending
+        # iterator.
+        if self._genlist:
+            return True
+        for r in self._consumegen():
+            return True
+        return False
+
+    def __contains__(self, x):
+        if x in self._cache:
+            return self._cache[x]
+
+        # Use new values only, as existing values would be cached.
+        for l in self._consumegen():
+            if l == x:
+                return True
+
+        self._cache[x] = False
+        return False
+
+    def _asccontains(self, x):
+        """version of contains optimised for ascending generator"""
+        if x in self._cache:
+            return self._cache[x]
+
+        # Use new values only, as existing values would be cached.
+        for l in self._consumegen():
+            if l == x:
+                return True
+            if l > x:
+                break
+
+        self._cache[x] = False
+        return False
+
+    def _desccontains(self, x):
+        """version of contains optimised for descending generator"""
+        if x in self._cache:
+            return self._cache[x]
+
+        # Use new values only, as existing values would be cached.
+        for l in self._consumegen():
+            if l == x:
+                return True
+            if l < x:
+                break
+
+        self._cache[x] = False
+        return False
+
+    def __iter__(self):
+        if self._ascending:
+            it = self.fastasc
+        else:
+            it = self.fastdesc
+        if it is not None:
+            return it()
+        # we need to consume the iterator
+        for x in self._consumegen():
+            pass
+        # recall the same code
+        return iter(self)
+
+    def _iterator(self):
+        if self._finished:
+            return iter(self._genlist)
+
+        # We have to use this complex iteration strategy to allow multiple
+        # iterations at the same time. We need to be able to catch revision
+        # removed from _consumegen and added to genlist in another instance.
+        #
+        # Getting rid of it would provide an about 15% speed up on this
+        # iteration.
+        genlist = self._genlist
+        nextrev = self._consumegen().next
+        _len = len # cache global lookup
+        def gen():
+            i = 0
+            while True:
+                if i < _len(genlist):
+                    yield genlist[i]
+                else:
+                    yield nextrev()
+                i += 1
+        return gen()
+
+    def _consumegen(self):
+        cache = self._cache
+        genlist = self._genlist.append
+        for item in self._gen:
+            cache[item] = True
+            genlist(item)
+            yield item
+        if not self._finished:
+            self._finished = True
+            asc = self._genlist[:]
+            asc.sort()
+            self._asclist = asc
+            self.fastasc = asc.__iter__
+            self.fastdesc = asc.__reversed__
+
+    def __len__(self):
+        for x in self._consumegen():
+            pass
+        return len(self._genlist)
+
+    def sort(self, reverse=False):
+        self._ascending = not reverse
+
+    def reverse(self):
+        self._ascending = not self._ascending
+
+    def isascending(self):
+        return self._ascending
+
+    def isdescending(self):
+        return not self._ascending
+
+    def istopo(self):
+        # not worth the trouble asserting if the two sets combined are still
+        # in topographical order. Use the sort() predicate to explicitly sort
+        # again instead.
+        return False
+
+    def first(self):
+        if self._ascending:
+            it = self.fastasc
+        else:
+            it = self.fastdesc
+        if it is None:
+            # we need to consume all and try again
+            for x in self._consumegen():
+                pass
+            return self.first()
+        return next(it(), None)
+
+    def last(self):
+        if self._ascending:
+            it = self.fastdesc
+        else:
+            it = self.fastasc
+        if it is None:
+            # we need to consume all and try again
+            for x in self._consumegen():
+                pass
+            return self.first()
+        return next(it(), None)
+
+    def __repr__(self):
+        d = {False: '-', True: '+'}[self._ascending]
+        return '<%s%s>' % (type(self).__name__, d)
+
+class spanset(abstractsmartset):
+    """Duck type for baseset class which represents a range of revisions and
+    can work lazily and without having all the range in memory
+
+    Note that spanset(x, y) behave almost like xrange(x, y) except for two
+    notable points:
+    - when x < y it will be automatically descending,
+    - revision filtered with this repoview will be skipped.
+
+    """
+    def __init__(self, repo, start=0, end=None):
+        """
+        start: first revision included the set
+               (default to 0)
+        end:   first revision excluded (last+1)
+               (default to len(repo)
+
+        Spanset will be descending if `end` < `start`.
+        """
+        if end is None:
+            end = len(repo)
+        self._ascending = start <= end
+        if not self._ascending:
+            start, end = end + 1, start +1
+        self._start = start
+        self._end = end
+        self._hiddenrevs = repo.changelog.filteredrevs
+
+    def sort(self, reverse=False):
+        self._ascending = not reverse
+
+    def reverse(self):
+        self._ascending = not self._ascending
+
+    def istopo(self):
+        # not worth the trouble asserting if the two sets combined are still
+        # in topographical order. Use the sort() predicate to explicitly sort
+        # again instead.
+        return False
+
+    def _iterfilter(self, iterrange):
+        s = self._hiddenrevs
+        for r in iterrange:
+            if r not in s:
+                yield r
+
+    def __iter__(self):
+        if self._ascending:
+            return self.fastasc()
+        else:
+            return self.fastdesc()
+
+    def fastasc(self):
+        iterrange = xrange(self._start, self._end)
+        if self._hiddenrevs:
+            return self._iterfilter(iterrange)
+        return iter(iterrange)
+
+    def fastdesc(self):
+        iterrange = xrange(self._end - 1, self._start - 1, -1)
+        if self._hiddenrevs:
+            return self._iterfilter(iterrange)
+        return iter(iterrange)
+
+    def __contains__(self, rev):
+        hidden = self._hiddenrevs
+        return ((self._start <= rev < self._end)
+                and not (hidden and rev in hidden))
+
+    def __nonzero__(self):
+        for r in self:
+            return True
+        return False
+
+    def __len__(self):
+        if not self._hiddenrevs:
+            return abs(self._end - self._start)
+        else:
+            count = 0
+            start = self._start
+            end = self._end
+            for rev in self._hiddenrevs:
+                if (end < rev <= start) or (start <= rev < end):
+                    count += 1
+            return abs(self._end - self._start) - count
+
+    def isascending(self):
+        return self._ascending
+
+    def isdescending(self):
+        return not self._ascending
+
+    def first(self):
+        if self._ascending:
+            it = self.fastasc
+        else:
+            it = self.fastdesc
+        for x in it():
+            return x
+        return None
+
+    def last(self):
+        if self._ascending:
+            it = self.fastdesc
+        else:
+            it = self.fastasc
+        for x in it():
+            return x
+        return None
+
+    def __repr__(self):
+        d = {False: '-', True: '+'}[self._ascending]
+        return '<%s%s %d:%d>' % (type(self).__name__, d,
+                                 self._start, self._end - 1)
+
+class fullreposet(spanset):
+    """a set containing all revisions in the repo
+
+    This class exists to host special optimization and magic to handle virtual
+    revisions such as "null".
+    """
+
+    def __init__(self, repo):
+        super(fullreposet, self).__init__(repo)
+
+    def __and__(self, other):
+        """As self contains the whole repo, all of the other set should also be
+        in self. Therefore `self & other = other`.
+
+        This boldly assumes the other contains valid revs only.
+        """
+        # other not a smartset, make is so
+        if not util.safehasattr(other, 'isascending'):
+            # filter out hidden revision
+            # (this boldly assumes all smartset are pure)
+            #
+            # `other` was used with "&", let's assume this is a set like
+            # object.
+            other = baseset(other - self._hiddenrevs)
+
+        other.sort(reverse=self.isdescending())
+        return other
+
+def prettyformat(revs):
+    lines = []
+    rs = repr(revs)
+    p = 0
+    while p < len(rs):
+        q = rs.find('<', p + 1)
+        if q < 0:
+            q = len(rs)
+        l = rs.count('<', 0, p) - rs.count('>', 0, p)
+        assert l >= 0
+        lines.append((l, rs[p:q].rstrip()))
+        p = q
+    return '\n'.join('  ' * l + s for l, s in lines)
diff -r f3807a135e43 -r 455677a7667f mercurial/store.py
--- a/mercurial/store.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/store.py	Mon Feb 13 09:44:16 2017 -0800
@@ -101,7 +101,7 @@
     e = '_'
     if pycompat.ispy3:
         xchr = lambda x: bytes([x])
-        asciistr = bytes(xrange(127))
+        asciistr = [bytes(a) for a in range(127)]
     else:
         xchr = chr
         asciistr = map(chr, xrange(127))
diff -r f3807a135e43 -r 455677a7667f mercurial/util.py
--- a/mercurial/util.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/util.py	Mon Feb 13 09:44:16 2017 -0800
@@ -63,9 +63,21 @@
 urlreq = pycompat.urlreq
 xmlrpclib = pycompat.xmlrpclib
 
+def isatty(fp):
+    try:
+        return fp.isatty()
+    except AttributeError:
+        return False
+
+# glibc determines buffering on first write to stdout - if we replace a TTY
+# destined stdout with a pipe destined stdout (e.g. pager), we want line
+# buffering
+if isatty(stdout):
+    stdout = os.fdopen(stdout.fileno(), 'wb', 1)
+
 if pycompat.osname == 'nt':
     from . import windows as platform
-    stdout = platform.winstdout(pycompat.stdout)
+    stdout = platform.winstdout(stdout)
 else:
     from . import posix as platform
 
@@ -2750,12 +2762,6 @@
     u.user = u.passwd = None
     return str(u)
 
-def isatty(fp):
-    try:
-        return fp.isatty()
-    except AttributeError:
-        return False
-
 timecount = unitcountfn(
     (1, 1e3, _('%.0f s')),
     (100, 1, _('%.1f s')),
diff -r f3807a135e43 -r 455677a7667f mercurial/verify.py
--- a/mercurial/verify.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/mercurial/verify.py	Mon Feb 13 09:44:16 2017 -0800
@@ -18,6 +18,7 @@
 from . import (
     error,
     revlog,
+    scmutil,
     util,
 )
 
@@ -32,21 +33,13 @@
         f = f.replace('//', '/')
     return f
 
-def _validpath(repo, path):
-    """Returns False if a path should NOT be treated as part of a repo.
-
-    For all in-core cases, this returns True, as we have no way for a
-    path to be mentioned in the history but not actually be
-    relevant. For narrow clones, this is important because many
-    filelogs will be missing, and changelog entries may mention
-    modified files that are outside the narrow scope.
-    """
-    return True
-
 class verifier(object):
-    def __init__(self, repo):
+    # The match argument is always None in hg core, but e.g. the narrowhg
+    # extension will pass in a matcher here.
+    def __init__(self, repo, match=None):
         self.repo = repo.unfiltered()
         self.ui = repo.ui
+        self.match = match or scmutil.matchall(repo)
         self.badrevs = set()
         self.errors = 0
         self.warnings = 0
@@ -170,6 +163,7 @@
     def _verifychangelog(self):
         ui = self.ui
         repo = self.repo
+        match = self.match
         cl = repo.changelog
 
         ui.status(_("checking changesets\n"))
@@ -189,7 +183,7 @@
                     mflinkrevs.setdefault(changes[0], []).append(i)
                     self.refersmf = True
                 for f in changes[3]:
-                    if _validpath(repo, f):
+                    if match(f):
                         filelinkrevs.setdefault(_normpath(f), []).append(i)
             except Exception as inst:
                 self.refersmf = True
@@ -201,6 +195,7 @@
                         progress=None):
         repo = self.repo
         ui = self.ui
+        match = self.match
         mfl = self.repo.manifestlog
         mf = mfl._revlog.dirlog(dir)
 
@@ -243,12 +238,14 @@
                     elif f == "/dev/null":  # ignore this in very old repos
                         continue
                     fullpath = dir + _normpath(f)
-                    if not _validpath(repo, fullpath):
-                        continue
                     if fl == 't':
+                        if not match.visitdir(fullpath):
+                            continue
                         subdirnodes.setdefault(fullpath + '/', {}).setdefault(
                             fn, []).append(lr)
                     else:
+                        if not match(fullpath):
+                            continue
                         filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
             except Exception as inst:
                 self.exc(lr, _("reading delta %s") % short(n), inst, label)
diff -r f3807a135e43 -r 455677a7667f tests/run-tests.py
--- a/tests/run-tests.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/tests/run-tests.py	Mon Feb 13 09:44:16 2017 -0800
@@ -114,15 +114,20 @@
 
 def checkportisavailable(port):
     """return true if a port seems free to bind on localhost"""
-    try:
-        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        s.bind(('localhost', port))
-        s.close()
-        return True
-    except socket.error as exc:
-        if not exc.errno == errno.EADDRINUSE:
-            raise
-        return False
+    families = [getattr(socket, i, None)
+                for i in ('AF_INET', 'AF_INET6')
+                if getattr(socket, i, None) is not None]
+    for family in families:
+        try:
+            s = socket.socket(family, socket.SOCK_STREAM)
+            s.bind(('localhost', port))
+            s.close()
+            return True
+        except socket.error as exc:
+            if exc.errno not in (errno.EADDRINUSE, errno.EADDRNOTAVAIL,
+                                 errno.EPROTONOSUPPORT):
+                raise
+    return False
 
 closefds = os.name == 'posix'
 def Popen4(cmd, wd, timeout, env=None):
diff -r f3807a135e43 -r 455677a7667f tests/test-check-help.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-check-help.t	Mon Feb 13 09:44:16 2017 -0800
@@ -0,0 +1,25 @@
+#require test-repo
+
+  $ . "$TESTDIR/helpers-testrepo.sh"
+
+  $ cat <<'EOF' > scanhelptopics.py
+  > from __future__ import absolute_import, print_function
+  > import re
+  > import sys
+  > topics = set()
+  > topicre = re.compile(r':hg:`help ([a-z0-9\-.]+)`')
+  > for fname in sys.argv:
+  >     with open(fname) as f:
+  >         topics.update(m.group(1) for m in topicre.finditer(f.read()))
+  > for s in sorted(topics):
+  >     print(s)
+  > EOF
+
+  $ cd "$TESTDIR"/..
+
+Check if ":hg:`help TOPIC`" is valid:
+(use "xargs -n1 -t" to see which help commands are executed)
+
+  $ hg files 'glob:{hgext,mercurial}/**/*.py' \
+  > | xargs python "$TESTTMP/scanhelptopics.py" \
+  > | xargs -n1 hg help > /dev/null
diff -r f3807a135e43 -r 455677a7667f tests/test-check-py3-compat.t
--- a/tests/test-check-py3-compat.t	Fri Feb 10 18:20:58 2017 +0100
+++ b/tests/test-check-py3-compat.t	Mon Feb 13 09:44:16 2017 -0800
@@ -7,7 +7,6 @@
   contrib/python-zstandard/setup.py not using absolute_import
   contrib/python-zstandard/setup_zstd.py not using absolute_import
   contrib/python-zstandard/tests/common.py not using absolute_import
-  contrib/python-zstandard/tests/test_cffi.py not using absolute_import
   contrib/python-zstandard/tests/test_compressor.py not using absolute_import
   contrib/python-zstandard/tests/test_data_structures.py not using absolute_import
   contrib/python-zstandard/tests/test_decompressor.py not using absolute_import
diff -r f3807a135e43 -r 455677a7667f tests/test-check-pyflakes.t
--- a/tests/test-check-pyflakes.t	Fri Feb 10 18:20:58 2017 +0100
+++ b/tests/test-check-pyflakes.t	Mon Feb 13 09:44:16 2017 -0800
@@ -7,9 +7,8 @@
 (skipping binary file random-seed)
 
   $ hg locate 'set:**.py or grep("^#!.*python")' -X hgext/fsmonitor/pywatchman \
-  > -X mercurial/pycompat.py \
+  > -X mercurial/pycompat.py -X contrib/python-zstandard \
   > 2>/dev/null \
   > | xargs pyflakes 2>/dev/null | "$TESTDIR/filterpyflakes.py"
-  contrib/python-zstandard/tests/test_data_structures.py:107: local variable 'size' is assigned to but never used
   tests/filterpyflakes.py:39: undefined name 'undefinedname'
   
diff -r f3807a135e43 -r 455677a7667f tests/test-doctest.py
--- a/tests/test-doctest.py	Fri Feb 10 18:20:58 2017 +0100
+++ b/tests/test-doctest.py	Mon Feb 13 09:44:16 2017 -0800
@@ -29,6 +29,7 @@
 testmod('mercurial.pathutil')
 testmod('mercurial.parser')
 testmod('mercurial.revset')
+testmod('mercurial.smartset')
 testmod('mercurial.store')
 testmod('mercurial.subrepo')
 testmod('mercurial.templatefilters')
diff -r f3807a135e43 -r 455677a7667f tests/test-help.t
--- a/tests/test-help.t	Fri Feb 10 18:20:58 2017 +0100
+++ b/tests/test-help.t	Mon Feb 13 09:44:16 2017 -0800
@@ -1547,11 +1547,11 @@
          "default:pushurl" should be used instead.
   
   $ hg help glossary.mcguffin
-  abort: help section not found
+  abort: help section not found: glossary.mcguffin
   [255]
 
   $ hg help glossary.mc.guffin
-  abort: help section not found
+  abort: help section not found: glossary.mc.guffin
   [255]
 
   $ hg help template.files
diff -r f3807a135e43 -r 455677a7667f tests/test-mq.t
--- a/tests/test-mq.t	Fri Feb 10 18:20:58 2017 +0100
+++ b/tests/test-mq.t	Mon Feb 13 09:44:16 2017 -0800
@@ -25,7 +25,7 @@
   Known patches are represented as patch files in the .hg/patches directory.
   Applied patches are both patch files and changesets.
   
-  Common tasks (use 'hg help command' for more details):
+  Common tasks (use 'hg help COMMAND' for more details):
   
     create new patch                          qnew
     import existing patch                     qimport
diff -r f3807a135e43 -r 455677a7667f tests/test-revset.t
--- a/tests/test-revset.t	Fri Feb 10 18:20:58 2017 +0100
+++ b/tests/test-revset.t	Mon Feb 13 09:44:16 2017 -0800
@@ -40,6 +40,7 @@
   >     cmdutil,
   >     node as nodemod,
   >     revset,
+  >     smartset,
   > )
   > cmdtable = {}
   > command = cmdutil.command(cmdtable)
@@ -59,7 +60,7 @@
   >     func = revset.match(ui, expr, repo)
   >     revs = func(repo)
   >     if ui.verbose:
-  >         ui.note("* set:\n", revset.prettyformatset(revs), "\n")
+  >         ui.note("* set:\n", smartset.prettyformat(revs), "\n")
   >     for c in revs:
   >         ui.write("%s\n" % c)
   > EOF
diff -r f3807a135e43 -r 455677a7667f tests/test-update-branches.t
--- a/tests/test-update-branches.t	Fri Feb 10 18:20:58 2017 +0100
+++ b/tests/test-update-branches.t	Mon Feb 13 09:44:16 2017 -0800
@@ -177,6 +177,28 @@
 
   $ cd ..
 
+Test updating to null revision
+
+  $ hg init null-repo
+  $ cd null-repo
+  $ echo a > a
+  $ hg add a
+  $ hg ci -m a
+  $ hg up -qC 0
+  $ echo b > b
+  $ hg add b
+  $ hg up null
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ hg st
+  A b
+  $ hg up -q 0
+  $ hg st
+  A b
+  $ hg up -qC null
+  $ hg st
+  ? b
+  $ cd ..
+
 Test updating with closed head
 ---------------------------------------------------------------------