[zlib][intel] Implement QAT hardware acceleration code path

QAT is a hardware accelerator developed by Intel to speed up compression and cryptographic algorithms and this patch adds support to leverage hardware acceleration in zlib for
compression operations.

It adds a new path for deflate_fast and deflate_slow to perform compression with QAT using a helper library called qatzpp (included in the patch).
We added a new member in the struct deflate_state to keep a context for QAT operations (i.e. qat_s) that is initialized when deflateInit() is called.

When a valid QAT context exists (i. e. qat_s is not NULL) all deflate_fast and deflate_slow calls are deferred to qat_deflate_step in contrib/qat/deflate_qat.cpp
This overrides the usage of pending_buf and window buffers from deflate_state, which will remain empty for as long as QAT is being used.

The average performance gain was 5.9x (i.e. +500%) for the snappy data corpus (plus linux kernel tarball and javascript corpus), with up to 4x for HTML4, 12x for linux kernel sources and 14x for Javascript corpus.
The average compression ratio loss is of ~0.6% for the same data corpus.

The system environment requirements are:

* A kernel with the QAT 4XXX module (we performed tests with kernels >=6.1.0)
* the packages qatlib and qatlib-devel installed in the system. (CentOS 9)
* The system configuration file /etc/sysconfig/qat with ServicesEnabled=dc [1]
* The systemd service qat.service active and running.
* A 'qat' group and users in that group [2] (these users will have permissions to run QAT)
* The system configuration file /etc/security/limits.conf with a memlock for the user(s) that will run QAT [3]
   e. g. gustavoa   -   memlock   500000

To enable the QAT hardware-accelerated fast path, simply build chromium-zlib with the CMake option ENABLE_INTEL_QAT_COMPRESSION.

[1] https://github.com/intel/qatlib/blob/main/INSTALL#L409
[2] https://github.com/intel/qatlib/blob/main/INSTALL#L273
[3] https://github.com/intel/qatlib/blob/main/INSTALL#L284

Bug: 40267458

Signed-off-by: Gustavo A Espinoza <gustavo.adolfo.espinoza.quintero@intel.com>
Change-Id: Idb0c380d39ca72669b1bccb7a6d993894b62b592
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5419776
Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org>
Reviewed-by: Hans Wennborg <hans@chromium.org>
Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1325829}
NOKEYCHECK=True
GitOrigin-RevId: ecb289f4169b502fb84c2205343857b1ce385b31
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 66f7d04..59d77c3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,6 +24,7 @@
 option(ENABLE_SIMD_OPTIMIZATIONS "Enable all SIMD optimizations" OFF)
 option(ENABLE_SIMD_AVX512 "Enable SIMD AXV512 optimizations" OFF)
 option(USE_ZLIB_RABIN_KARP_HASH "Enable bitstream compatibility with canonical zlib" OFF)
+option(ENABLE_INTEL_QAT_COMPRESSION "Enable Intel Quick Assist Technology use for compression" OFF)
 option(BUILD_UNITTESTS "Enable standalone unit tests build" OFF)
 option(BUILD_MINIZIP_BIN "Enable building minzip_bin tool" OFF)
 option(BUILD_ZPIPE "Enable building zpipe tool" OFF)
@@ -228,6 +229,22 @@
   endif()
 endif()
 
+if (ENABLE_INTEL_QAT_COMPRESSION)
+    list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/deflate_qat.cpp)
+    list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/io_buffers.cpp)
+    list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/memory.cpp)
+    list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/qat_buffer_list.cpp)
+    list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/qat.cpp)
+    list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/qat_instance.cpp)
+    list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/session.cpp)
+    list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/qat_task.cpp)
+
+    # TODO(gustavoa): Find a way to include the qatzpp headers without having the
+    # presubmit check throw errors.
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp)
+    add_compile_definitions(QAT_COMPRESSION_ENABLED)
+endif()
+
 # parse the full version number from zlib.h and include in ZLIB_FULL_VERSION
 file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h _zlib_h_contents)
 string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*"
@@ -254,6 +271,15 @@
 set_target_properties(zlib PROPERTIES DEFINE_SYMBOL ZLIB_DLL)
 set_target_properties(zlib PROPERTIES SOVERSION 1)
 
+if (ENABLE_INTEL_QAT_COMPRESSION)
+    target_include_directories(zlib PUBLIC ${QATZPP_INCLUDE_DIRS})
+    target_link_libraries(zlib ${QATZPP_LIBRARY})
+    target_link_libraries(zlib qat)
+    target_include_directories(zlibstatic PUBLIC ${QATZPP_INCLUDE_DIRS})
+    target_link_libraries(zlibstatic ${QATZPP_LIBRARY})
+    target_link_libraries(zlibstatic qat)
+endif()
+
 if(NOT CYGWIN)
     # This property causes shared libraries on Linux to have the full version
     # encoded into their final filename.  We disable this on Cygwin because
diff --git a/contrib/qat/deflate_qat.cpp b/contrib/qat/deflate_qat.cpp
new file mode 100644
index 0000000..bfe4547
--- /dev/null
+++ b/contrib/qat/deflate_qat.cpp
@@ -0,0 +1,312 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "deflate_qat.h"
+#include "deflate.h"
+
+#include "session.hpp"
+#include "qat_instance.hpp"
+#include "qat_buffer_list.hpp"
+#include "qat.hpp"
+
+#include <memory>
+
+/*
+*   TODO(gustavoa): Make the input size adjustable from the memlevel
+*   attribute on deflateInit.
+*/
+static constexpr size_t kInputSize = 1024 * 1024;
+
+/* QAT Instances obtained available from the library. */
+static std::vector<std::shared_ptr<qat::Instance>> qat_instances;
+
+/*
+*   TODO(gustavoa): Verify if the ordering of the struct fields won't create
+*   unnecessary holes in the structure that requires extraneous padding.
+*/
+struct qat_deflate {
+    std::unique_ptr<qat::DeflateSession> qat_session;
+
+    /*  QAT requires contiguous physical pages. Cannot be allocated using
+    *   malloc/new.
+    */
+    uint8_t *input_buffer;
+    uint8_t *output_buffer;
+
+    /* Pointer to the next byte in the output buffer. */
+    uint8_t *pending_out;
+
+    unsigned input_buffer_size;
+    unsigned output_buffer_size;
+
+    unsigned pending_in_count;
+    unsigned pending_out_count;
+};
+
+static std::unique_ptr<qat::DeflateSession> qat_create_session(int level, int wrap)
+{
+    CpaDcChecksum checksum = CPA_DC_NONE;
+
+    switch(wrap) {
+    case 1:
+        checksum = CPA_DC_ADLER32;
+        break;
+    case 2:
+        checksum = CPA_DC_CRC32;
+        break;
+    }
+
+    return std::make_unique<qat::DeflateSession>(
+        qat_instances[0],
+        (CpaDcCompLvl)level,
+        checksum,
+        0
+    );
+}
+
+
+int qat_deflate_init()
+{
+    return (qat::Initialize()) ? Z_ERRNO : Z_OK;
+}
+
+struct qat_deflate* qat_deflate_state_init(int level, int wrap)
+{
+    if (qat_instances.empty()) {
+        qat_instances = qat::Instance::Create();
+    }
+    if (qat_instances.empty()) {
+        return nullptr;
+    }
+
+    struct qat_deflate *qat_deflate = new struct qat_deflate;
+    if (!qat_deflate) {
+        return nullptr;
+    }
+
+    /* TODO(gustavoa): Find a way to utilize all the available instances for the same
+     * process.
+     */
+    qat_instances[0]->Start();
+
+    qat_deflate->qat_session = qat_create_session(level, wrap);
+
+    qat_deflate->input_buffer_size = kInputSize;
+    qat_deflate->input_buffer = qat::AllocBlockArray<uint8_t>(kInputSize, 0);
+    qat_deflate->output_buffer_size =
+        qat_deflate->qat_session->GetDeflateBound(qat_deflate->input_buffer_size);
+    qat_deflate->pending_out = qat_deflate->output_buffer =
+        qat::AllocBlockArray<uint8_t>(qat_deflate->output_buffer_size, 0);
+
+    qat_deflate->pending_in_count = qat_deflate->pending_out_count = 0;
+
+    if (!qat_deflate->input_buffer || !qat_deflate->output_buffer) {
+        return nullptr;
+    }
+
+    return qat_deflate;
+}
+
+static unsigned qat_read_buf(z_streamp strm, struct qat_deflate* qat, unsigned size)
+{
+    unsigned len = strm->avail_in;
+
+    if (len > size) {
+        len = size;
+    }
+    if (len == 0) return 0;
+
+    strm->avail_in -= len;
+    strm->total_in += len;
+
+    zmemcpy(
+        qat->input_buffer + qat->pending_in_count,
+        strm->next_in,
+        len
+    );
+
+    strm->next_in += len;
+    qat->pending_in_count += len;
+
+    return len;
+}
+
+void qat_flush_pending(deflate_state* s)
+{
+    unsigned len;
+    z_streamp strm = s->strm;
+    struct qat_deflate* qat = s->qat_s;
+
+    len = qat->pending_out_count;
+    if (len > strm->avail_out) len = strm->avail_out;
+    if (len == 0) return;
+
+    zmemcpy(strm->next_out, qat->pending_out, len);
+
+    qat->pending_out        += len;
+    qat->pending_out_count -= len;
+    strm->next_out          += len;
+    strm->avail_out         -= len;
+    strm->total_out         += len;
+    if (qat->pending_out_count == 0) {
+        qat->pending_out = qat->output_buffer;
+    }
+}
+
+static int qat_compress_pending(deflate_state*s, int flush)
+{
+    struct qat_deflate* qat = s->qat_s;
+    uint32_t metadata_size;
+
+    /* TODO(gustavoa): find a way to make qatzpp setup this number internally. */
+    cpaDcBufferListGetMetaSize(qat->qat_session->getInstance()->GetHandle(), 1, &metadata_size);
+
+    auto job = qat->qat_session->Deflate(
+        std::make_unique<qat::IOBuffers>(
+            std::make_unique<qat::BufferListUser>(
+                qat->input_buffer,
+                qat->pending_in_count,
+                metadata_size
+            ),
+            std::make_unique<qat::BufferListUser>(
+                qat->output_buffer,
+                qat->output_buffer_size,
+                metadata_size
+            )
+        ), (flush == Z_FINISH && s->strm->avail_in == 0)
+    );
+
+    job->WaitCompletion();
+
+    /*
+     *  TODO(gustavoa): make QAT perform the checksum combine.
+     */
+    if (s->wrap == 2) {
+        s->strm->adler = crc32_combine(
+            s->strm->adler,
+            job->GetResults()->checksum,
+            job->GetResults()->consumed
+        );
+    } else if (s->wrap == 1) {
+        s->strm->adler = adler32(
+            s->strm->adler,
+            qat->input_buffer,
+            job->GetResults()->consumed
+        );
+    }
+
+    qat->pending_out_count = job->GetResults()->produced;
+    qat->pending_in_count -= job->GetResults()->consumed;
+
+    if(qat->pending_in_count != 0) {
+        /* Copy any remaining bytes to the beginning of the buffer. */
+        zmemcpy(
+            qat->input_buffer,
+            qat->input_buffer + job->GetResults()->consumed,
+            qat->pending_in_count
+        );
+    }
+
+    return 0;
+}
+
+qat_block_state qat_deflate_step(deflate_state* s, int flush)
+{
+    z_streamp strm = s->strm;
+    struct qat_deflate* qat_state = s->qat_s;
+
+    for (;;) {
+        if (qat_state->pending_in_count < qat_state->input_buffer_size) {
+            qat_read_buf(
+                strm,
+                qat_state,
+                qat_state->input_buffer_size - qat_state->pending_in_count
+            );
+            if (qat_state->pending_in_count < qat_state->input_buffer_size && flush == Z_NO_FLUSH) {
+                return qat_block_need_more;
+            } else {
+                qat_compress_pending(s, flush);
+            }
+            if (strm->avail_in == 0) {
+                break;
+            }
+        } else {
+            qat_compress_pending(s, flush);
+        }
+
+        qat_flush_pending(s);
+        if (strm->avail_out == 0) {
+            return (flush == Z_FINISH) ? qat_block_finish_started : qat_block_need_more;
+        }
+    }
+
+    if (flush == Z_FINISH) {
+        qat_flush_pending(s);
+        if (strm->avail_out == 0) {
+            return qat_block_finish_started;
+        } else {
+            return qat_block_finish_done;
+        }
+    }
+
+    qat_flush_pending(s);
+    if (strm->avail_out == 0) {
+        return qat_block_done;
+    }
+
+    return qat_block_need_more;
+}
+
+int qat_deflate_state_free(deflate_state* s)
+{
+    struct qat_deflate* qat_state = s->qat_s;
+    if (qat_state->input_buffer) {
+        qat::Free(qat_state->input_buffer);
+    }
+    if (qat_state->output_buffer) {
+        qat::Free(qat_state->output_buffer);
+    }
+
+    qat_state->qat_session.reset();
+    delete qat_state;
+    s->qat_s = nullptr;
+
+    return Z_OK;
+}
+
+struct qat_deflate *qat_deflate_copy(deflate_state *ss)
+{
+    struct qat_deflate *sqat = ss->qat_s;
+    struct qat_deflate *dqat = nullptr;
+
+    if (!sqat) {
+        return nullptr;
+    }
+
+    dqat = new struct qat_deflate;
+
+    dqat->qat_session = qat_create_session(ss->level, ss->wrap);
+
+    dqat->input_buffer_size = sqat->input_buffer_size;
+    dqat->input_buffer = qat::AllocBlockArray<uint8_t>(dqat->input_buffer_size, 0);
+
+    dqat->output_buffer_size = sqat->output_buffer_size;
+    dqat->output_buffer = qat::AllocBlockArray<uint8_t>(dqat->output_buffer_size, 0);
+
+    dqat->pending_in_count = sqat->pending_in_count;
+    dqat->pending_out_count = sqat->pending_out_count;
+
+    dqat->pending_out =
+        dqat->output_buffer + (sqat->pending_out - sqat->output_buffer);
+
+    zmemcpy(dqat->input_buffer, sqat->input_buffer, dqat->input_buffer_size);
+    zmemcpy(dqat->output_buffer, sqat->output_buffer, dqat->output_buffer_size);
+
+    return dqat;
+}
+
diff --git a/contrib/qat/deflate_qat.h b/contrib/qat/deflate_qat.h
new file mode 100644
index 0000000..3c7aa11
--- /dev/null
+++ b/contrib/qat/deflate_qat.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#ifndef DEFLATE_QAT_H
+#define DEFLATE_QAT_H
+
+#include "deflate.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This is a 1:1 mapping of the block states that deflate_fast, deflate_slow,
+ * deflate_rle, etc.. return.
+ * The added 'qat_failure' value is used for signaling the caller to revert
+ * back into software mode.
+ */
+typedef enum {
+    qat_block_need_more,
+    qat_block_done,
+    qat_block_finish_started,
+    qat_block_finish_done,
+    qat_failure
+} qat_block_state;
+
+/* Initialize QAT for the calling process if it has not been yet initialized. */
+int qat_deflate_init();
+
+/* Initialize a QAT stream state for a deflate_state object. */
+struct qat_deflate *qat_deflate_state_init(int level, int wra);
+
+/* Flush QAT output buffer into the zstream.next_out pointer. */
+void qat_flush_pending(deflate_state*);
+
+/* Compresses/copies/flushes any data in the internal QAT state
+ * input/output buffers.
+*/
+qat_block_state qat_deflate_step(deflate_state*, int flush);
+
+/* Frees all the QAT-related buffers and objects for a given deflate_state. */
+int qat_deflate_state_free(deflate_state*);
+
+struct qat_deflate *qat_deflate_copy(deflate_state *ss);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/contrib/qat/qatzpp/io_buffers.cpp b/contrib/qat/qatzpp/io_buffers.cpp
new file mode 100644
index 0000000..2870292
--- /dev/null
+++ b/contrib/qat/qatzpp/io_buffers.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include <fstream>
+#include <iostream>
+
+#include "io_buffers.h"
+#include "qat_instance.hpp"
+
+namespace qat
+{
+
+IOBuffers::IOBuffers()
+{
+}
+
+IOBuffers::IOBuffers(std::unique_ptr<BaseBufferList>&& src_list, std::unique_ptr<BaseBufferList>&& dst_list):
+    src_buffer_list_(std::move(src_list)), dst_buffer_list_(std::move(dst_list))
+{
+}
+
+IOBuffers::~IOBuffers()
+{
+}
+
+}
diff --git a/contrib/qat/qatzpp/io_buffers.h b/contrib/qat/qatzpp/io_buffers.h
new file mode 100644
index 0000000..9fe8bfd
--- /dev/null
+++ b/contrib/qat/qatzpp/io_buffers.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#ifndef QATZPP_IO_BUFFERS_H
+#define QATZPP_IO_BUFFERS_H
+
+#include <qat/cpa_dc.h>
+
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "memory.hpp"
+#include "qat_instance.hpp"
+
+namespace qat
+{
+
+struct BaseBufferList
+{
+    virtual ~BaseBufferList() {}
+
+    CpaBufferList list;
+    std::vector<CpaFlatBuffer> flat_buffers;
+
+protected:
+    BaseBufferList() {}
+};
+
+class IOBuffers
+{
+public:
+    IOBuffers(
+        std::unique_ptr<BaseBufferList> &&src_list,
+        std::unique_ptr<BaseBufferList> &&dst_list
+    );
+    virtual ~IOBuffers();
+
+    BaseBufferList *GetSrc() const {
+        return src_buffer_list_.get();
+    }
+
+    BaseBufferList *GetDst() const {
+        return dst_buffer_list_.get();
+    }
+protected:
+    IOBuffers();
+
+    std::unique_ptr<BaseBufferList> src_buffer_list_;
+    std::unique_ptr<BaseBufferList> dst_buffer_list_;
+};
+
+}
+
+#endif
\ No newline at end of file
diff --git a/contrib/qat/qatzpp/memory.cpp b/contrib/qat/qatzpp/memory.cpp
new file mode 100644
index 0000000..6a97ffe
--- /dev/null
+++ b/contrib/qat/qatzpp/memory.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include <qat/qae_mem.h>
+
+#include <cstdlib>
+#include <iostream>
+
+#include "memory.hpp"
+#include "qat.hpp"
+
+namespace qat
+{
+
+void *Alloc(size_t size_bytes, uint32_t numa_node)
+{
+    return qaeMemAllocNUMA(size_bytes, numa_node, 1);
+}
+
+void Free(void *ptr)
+{
+    qaeMemFreeNUMA(&ptr);
+}
+
+}
\ No newline at end of file
diff --git a/contrib/qat/qatzpp/memory.hpp b/contrib/qat/qatzpp/memory.hpp
new file mode 100644
index 0000000..191516c
--- /dev/null
+++ b/contrib/qat/qatzpp/memory.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#ifndef QATZPP_MEMORY_HPP
+#define QATZPP_MEMORY_HPP
+
+#include <cstddef>
+#include <cstdint>
+
+namespace qat
+{
+
+void *Alloc(size_t sizeBytes, uint32_t numa_node);
+
+template <typename T>
+T *AllocBlock(int32_t numa_node)
+{
+    return static_cast<T*>(Alloc(sizeof(T), numa_node));
+}
+
+template <typename T>
+T *AllocBlockArray(size_t count, int32_t numa_node)
+{
+    if (count <= 0) {
+        return nullptr;
+    }
+
+    return static_cast<T*>(Alloc(sizeof(T) * count, numa_node));
+}
+
+void Free(void *ptr);
+
+}
+
+#endif
\ No newline at end of file
diff --git a/contrib/qat/qatzpp/qat.cpp b/contrib/qat/qatzpp/qat.cpp
new file mode 100644
index 0000000..80468d3
--- /dev/null
+++ b/contrib/qat/qatzpp/qat.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "qat.hpp"
+
+#include <qat/cpa.h>
+#include <qat/icp_sal_user.h>
+#include <qat/qae_mem.h>
+
+#include <iostream>
+#include <string>
+#include <memory>
+#include <mutex>
+
+namespace qat
+{
+
+static bool g_qat_not_available = false;
+static bool g_qat_initialized = false;
+static std::mutex g_qat_initialization_mutex;
+
+class QATContext
+{
+public:
+    explicit QATContext() {}
+
+    QATContext(const QATContext &) = delete;
+    QATContext &operator=(const QATContext &) = delete;
+
+    QATContext(QATContext &&) = delete;
+    QATContext &operator=(QATContext &&) = delete;
+
+    ~QATContext()
+    {
+        std::lock_guard<std::mutex> lock(g_qat_initialization_mutex);
+
+        if (g_qat_not_available) return;
+
+        if (g_qat_initialized) {
+            icp_sal_userStop();
+            g_qat_initialized = false;
+        }
+    }
+};
+
+static std::unique_ptr<QATContext> qat_context;
+
+int Initialize()
+{
+    std::lock_guard<std::mutex> lock(g_qat_initialization_mutex);
+    uint32_t cpa_state;
+    if (g_qat_not_available) {
+        return CPA_STATUS_FAIL;
+    }
+    if (g_qat_initialized) {
+        return CPA_STATUS_SUCCESS;
+    }
+
+    cpa_state = icp_sal_userStartMultiProcess("SSL", CPA_FALSE);
+
+    g_qat_not_available = (cpa_state != CPA_STATUS_SUCCESS);
+    g_qat_initialized = (cpa_state == CPA_STATUS_SUCCESS);
+
+    qat_context = std::make_unique<QATContext>();
+    return cpa_state;
+}
+
+}
diff --git a/contrib/qat/qatzpp/qat.hpp b/contrib/qat/qatzpp/qat.hpp
new file mode 100644
index 0000000..8ee7746
--- /dev/null
+++ b/contrib/qat/qatzpp/qat.hpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#ifndef QATZPP_QAT_HPP
+#define QATZPP_QAT_HPP
+
+namespace qat
+{
+
+int Initialize();
+
+}
+
+#endif
\ No newline at end of file
diff --git a/contrib/qat/qatzpp/qat_buffer_list.cpp b/contrib/qat/qatzpp/qat_buffer_list.cpp
new file mode 100644
index 0000000..f0eea49
--- /dev/null
+++ b/contrib/qat/qatzpp/qat_buffer_list.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "qat_buffer_list.hpp"
+
+namespace qat
+{
+
+BufferListUser::BufferListUser(
+    uint8_t *data,
+    size_t size,
+    size_t metadata_size)
+{
+    flat_buffers = std::vector<CpaFlatBuffer>(1);
+    flat_buffers[0].pData = data;
+    flat_buffers[0].dataLenInBytes = size;
+    list.pPrivateMetaData = AllocBlockArray<uint8_t>(metadata_size, 0);
+    list.numBuffers = 1;
+    list.pBuffers = flat_buffers.data();
+}
+
+BufferListUser::~BufferListUser()
+{
+    if (list.pPrivateMetaData) {
+        Free(list.pPrivateMetaData);
+    }
+}
+
+}
diff --git a/contrib/qat/qatzpp/qat_buffer_list.hpp b/contrib/qat/qatzpp/qat_buffer_list.hpp
new file mode 100644
index 0000000..2a28175
--- /dev/null
+++ b/contrib/qat/qatzpp/qat_buffer_list.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#ifndef QATZPP_QAT_BUFFER_LIST_HPP
+#define QATZPP_QAT_BUFFER_LIST_HPP
+
+#include <qat/cpa.h>
+
+#include "io_buffers.h"
+
+namespace qat
+{
+
+struct BufferListUser final : public BaseBufferList
+{
+    BufferListUser(
+        uint8_t *data,
+        size_t size,
+        size_t metadata_size
+    );
+
+    ~BufferListUser() override;
+};
+
+}
+
+#endif
\ No newline at end of file
diff --git a/contrib/qat/qatzpp/qat_instance.cpp b/contrib/qat/qatzpp/qat_instance.cpp
new file mode 100644
index 0000000..5b833c2
--- /dev/null
+++ b/contrib/qat/qatzpp/qat_instance.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include <qat/qae_mem.h>
+
+#include <iostream>
+#include <vector>
+
+#include "memory.hpp"
+#include "qat_instance.hpp"
+#include "session.hpp"
+
+#define MAX_SAMPLE_BUFFER_SIZE  (4*1024*1024)
+
+namespace qat
+{
+
+static std::mutex g_instance_mutex;
+static std::vector<std::shared_ptr<Instance>> instances;
+
+static CpaPhysicalAddr virt2Phys(void *virt_addr)
+{
+    return (CpaPhysicalAddr)qaeVirtToPhysNUMA(virt_addr);
+}
+
+Instance::Instance(CpaInstanceHandle instance):
+    instance_(instance),
+    num_intermediate_buffer_lists_(0),
+    intermediate_buffer_array_(nullptr),
+    started_(false)
+{
+    CpaDcInstanceCapabilities caps{};
+    cpaDcQueryCapabilities(instance_, &caps);
+
+    if (!caps.statelessDeflateCompression || !caps.statelessDeflateDecompression ||
+        !caps.checksumAdler32 || !caps.dynamicHuffman)
+    {
+        return;
+    }
+
+    if (caps.dynamicHuffmanBufferReq) {
+        uint32_t buffer_metadata_size;
+        cpaDcBufferListGetMetaSize(instance_, 1, &buffer_metadata_size);
+        cpaDcGetNumIntermediateBuffers(instance_, &num_intermediate_buffer_lists_);
+
+        if(num_intermediate_buffer_lists_) {
+            intermediate_buffer_array_ = AllocBlockArray<CpaBufferList*>(num_intermediate_buffer_lists_, 0);
+        }
+        for (int i = 0; i < num_intermediate_buffer_lists_; ++i) {
+            intermediate_buffer_array_[i] = AllocBlock<CpaBufferList>(0);
+            intermediate_buffer_array_[i]->pPrivateMetaData =
+                                                    AllocBlockArray<uint8_t>(buffer_metadata_size, 0);
+            intermediate_buffer_array_[i]->pBuffers = AllocBlock<CpaFlatBuffer>(0);
+            intermediate_buffer_array_[i]->pBuffers->pData =
+                                                    AllocBlockArray<uint8_t>(MAX_SAMPLE_BUFFER_SIZE, 0);
+            intermediate_buffer_array_[i]->pBuffers->dataLenInBytes = MAX_SAMPLE_BUFFER_SIZE;
+        }
+    }
+
+    cpaDcSetAddressTranslation(instance_, virt2Phys);
+}
+
+Instance::~Instance()
+{
+}
+
+CpaDcInstanceCapabilities Instance::GetCapabilities()
+{
+    CpaDcInstanceCapabilities caps{};
+    cpaDcQueryCapabilities(instance_, &caps);
+
+    return caps;
+}
+
+CpaInstanceInfo2 Instance::GetInfo()
+{
+    CpaInstanceInfo2 info{};
+    cpaDcInstanceGetInfo2(instance_, &info);
+
+    return info;
+}
+
+int Instance::Start()
+{
+    std::lock_guard<std::mutex> lock(mutex_);
+
+    if (started_) {
+        return 0;
+    }
+
+    int ret = cpaDcStartInstance
+    (
+        instance_,
+        num_intermediate_buffer_lists_,
+        intermediate_buffer_array_
+    );
+    if (ret) {
+        return -1;
+    }
+    started_ = true;
+    return 0;
+}
+
+std::vector<std::shared_ptr<Instance>> Instance::Create()
+{
+    std::lock_guard<std::mutex> lock(g_instance_mutex);
+    uint16_t num_instances = 0;
+
+    if (!instances.empty()) {
+        return instances;
+    }
+
+    cpaDcGetNumInstances(&num_instances);
+
+    if (!num_instances) {
+        std::cerr << "No instances found\n";
+        return {};
+    }
+
+    std::vector<CpaInstanceHandle> handles(num_instances);
+    cpaDcGetInstances(num_instances, handles.data());
+
+    for(auto& handle: handles) {
+        instances.emplace_back(std::make_shared<Instance>(handle));
+    }
+
+    return instances;
+}
+
+}
diff --git a/contrib/qat/qatzpp/qat_instance.hpp b/contrib/qat/qatzpp/qat_instance.hpp
new file mode 100644
index 0000000..1a2b4af
--- /dev/null
+++ b/contrib/qat/qatzpp/qat_instance.hpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#ifndef QATZPP_QAT_INSTANCE_HPP
+#define QATZPP_QAT_INSTANCE_HPP
+
+#include <qat/cpa_dc.h>
+
+#include <memory>
+#include <mutex>
+#include <vector>
+
+namespace qat
+{
+
+class Instance
+{
+public:
+    Instance(CpaInstanceHandle);
+    ~Instance();
+
+    CpaInstanceHandle GetHandle() { return instance_; }
+    CpaDcInstanceCapabilities GetCapabilities();
+    CpaInstanceInfo2 GetInfo();
+
+    int Start(void);
+    static std::vector<std::shared_ptr<Instance>> Create();
+private:
+
+    CpaInstanceHandle instance_;
+    uint16_t num_intermediate_buffer_lists_;
+    CpaBufferList **intermediate_buffer_array_;
+    bool started_;
+
+    std::mutex mutex_;
+};
+
+}
+
+#endif
\ No newline at end of file
diff --git a/contrib/qat/qatzpp/qat_task.cpp b/contrib/qat/qatzpp/qat_task.cpp
new file mode 100644
index 0000000..a53ea94
--- /dev/null
+++ b/contrib/qat/qatzpp/qat_task.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include <qat/cpa.h>
+#include <qat/icp_sal_poll.h>
+
+#include "qat_task.hpp"
+
+namespace qat
+{
+
+QATTask::QATTask(std::shared_ptr<Instance> &qat_instance,
+    std::unique_ptr<IOBuffers> &&buffers,
+    std::unique_ptr<CpaDcRqResults> &&dc_results):
+    qat_instance_(qat_instance),
+    io_buffers_(std::move(buffers)),
+    dc_results_(std::move(dc_results)),
+    completed_(false)
+{
+}
+
+void QATTask::WaitCompletion()
+{
+    if (completed_) {
+        return;
+    }
+
+    while (!completed_) {
+        icp_sal_DcPollInstance(qat_instance_->GetHandle(), 0);
+    }
+}
+
+IOBuffers *QATTask::GetBuffers()
+{
+    return io_buffers_.get();
+}
+
+CpaDcRqResults *QATTask::GetResults()
+{
+    return dc_results_.get();
+}
+
+void dc_callback(void *callback_tag, CpaStatus status)
+{
+    if (!callback_tag) {
+        return;
+    }
+    // Ugly and dangerous
+    QATTask* task = static_cast<QATTask*>(callback_tag);
+    task->completed_ = true;
+}
+
+}
\ No newline at end of file
diff --git a/contrib/qat/qatzpp/qat_task.hpp b/contrib/qat/qatzpp/qat_task.hpp
new file mode 100644
index 0000000..3950502
--- /dev/null
+++ b/contrib/qat/qatzpp/qat_task.hpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#ifndef QATZPP_WORK_HPP
+#define QATZPP_WORK_HPP
+
+#include <qat/cpa.h>
+
+#include <memory>
+
+#include "io_buffers.h"
+
+namespace qat
+{
+
+class QATTask
+{
+public:
+    explicit QATTask(std::shared_ptr<Instance> &qat_instance,
+                std::unique_ptr<IOBuffers> &&,
+                std::unique_ptr<CpaDcRqResults> &&dc_results);
+
+    QATTask(QATTask &&) = delete;
+    QATTask& operator=(QATTask &&) = delete;
+
+    QATTask(const QATTask &) = delete;
+    QATTask &operator=(const QATTask &) = delete;
+
+    void WaitCompletion();
+
+    IOBuffers *GetBuffers();
+    CpaDcRqResults *GetResults();
+
+private:
+    bool completed_;
+
+    std::shared_ptr<Instance> qat_instance_;
+
+    std::unique_ptr<CpaDcRqResults> dc_results_;
+    std::unique_ptr<IOBuffers> io_buffers_;
+
+    friend void dc_callback(void *, CpaStatus);
+};
+
+void dc_callback(void*, CpaStatus);
+
+}
+
+#endif
\ No newline at end of file
diff --git a/contrib/qat/qatzpp/session.cpp b/contrib/qat/qatzpp/session.cpp
new file mode 100644
index 0000000..b4cefb3
--- /dev/null
+++ b/contrib/qat/qatzpp/session.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include <iostream>
+#include <semaphore.h>
+
+#include "memory.hpp"
+#include "session.hpp"
+
+namespace qat
+{
+
+constexpr CpaDcHuffType kHuffType = CPA_DC_HT_FULL_DYNAMIC;
+
+DeflateSession::DeflateSession(
+    std::shared_ptr<Instance> &qat_instance,
+    CpaDcCompLvl comp_level, CpaDcChecksum checksum,
+    uint32_t numa_node):
+    qat_instance_(qat_instance)
+{
+    uint32_t session_size = 0;
+    uint32_t ctx_size = 0;
+
+    CpaDcSessionSetupData sd{};
+    sd.compLevel = comp_level;
+    sd.compType = CPA_DC_DEFLATE;
+    sd.huffType = kHuffType;
+    sd.autoSelectBestHuffmanTree = CPA_DC_ASB_UNCOMP_STATIC_DYNAMIC_WITH_STORED_HDRS;
+    sd.sessDirection = CPA_DC_DIR_COMBINED;
+    sd.sessState = CPA_DC_STATELESS;
+    sd.checksum = checksum;
+
+    cpaDcGetSessionSize(qat_instance_->GetHandle(), &sd, &session_size, &ctx_size);
+    session_ = AllocBlockArray<uint8_t>(session_size, numa_node);
+
+    cpaDcInitSession(
+        qat_instance_->GetHandle(),
+        session_,
+        &sd,
+        nullptr, // No context for stateless operations
+        &dc_callback
+    );
+
+}
+
+DeflateSession::~DeflateSession()
+{
+    if (session_) {
+        cpaDcRemoveSession(qat_instance_->GetHandle(), session_);
+        Free(session_);
+    }
+
+    session_ = nullptr;
+}
+
+std::unique_ptr<QATTask> DeflateSession::Deflate(
+    std::unique_ptr<IOBuffers> &&buffers,
+    bool flush_final)
+{
+    CpaDcOpData op_data{};
+    op_data.flushFlag = (flush_final) ?
+        CPA_DC_FLUSH_FINAL : CPA_DC_FLUSH_FULL;
+    op_data.compressAndVerify = CPA_TRUE;
+    op_data.inputSkipData.skipMode = CPA_DC_SKIP_DISABLED;
+    op_data.outputSkipData.skipMode = CPA_DC_SKIP_DISABLED;
+
+    auto task = std::make_unique<QATTask>(
+        qat_instance_, std::move(buffers),
+        std::make_unique<CpaDcRqResults>()
+    );
+
+    cpaDcCompressData2(
+        qat_instance_->GetHandle(),
+        session_,
+        &task->GetBuffers()->GetSrc()->list,
+        &task->GetBuffers()->GetDst()->list,
+        &op_data,
+        task->GetResults(),
+        static_cast<void*>(task.get())
+    );
+
+    return std::move(task);
+}
+
+std::unique_ptr<QATTask> DeflateSession::Inflate(std::unique_ptr<IOBuffers> &&buffers)
+{
+    CpaDcOpData op_data = {};
+    op_data.flushFlag = CPA_DC_FLUSH_FINAL;
+    op_data.compressAndVerify = CPA_TRUE;
+    op_data.inputSkipData.skipMode = CPA_DC_SKIP_DISABLED;
+    op_data.outputSkipData.skipMode = CPA_DC_SKIP_DISABLED;
+
+    auto task = std::make_unique<QATTask>(
+        qat_instance_, std::move(buffers),
+        std::make_unique<CpaDcRqResults>()
+    );
+
+    cpaDcDecompressData2(
+        qat_instance_->GetHandle(),
+        session_,
+        &task->GetBuffers()->GetSrc()->list,
+        &task->GetBuffers()->GetDst()->list,
+        &op_data,
+        task->GetResults(),
+        static_cast<void*>(task.get())
+    );
+
+    return std::move(task);
+}
+
+uint32_t DeflateSession::GetDeflateBound(uint32_t input_size)
+{
+    uint32_t output_size = 0;
+
+    cpaDcDeflateCompressBound(
+        qat_instance_->GetHandle(),
+        kHuffType,
+        input_size, &output_size
+    );
+
+    return output_size;
+}
+
+}
diff --git a/contrib/qat/qatzpp/session.hpp b/contrib/qat/qatzpp/session.hpp
new file mode 100644
index 0000000..c8af47c
--- /dev/null
+++ b/contrib/qat/qatzpp/session.hpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2024 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Gustavo A Espinoza   <gustavo.adolfo.espinoza.quintero@intel.com>
+ *                       <gustavoaespinozaq@hotmail.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#ifndef QATZPP_SESSION_HPP
+#define QATZPP_SESSION_HPP
+
+#include <qat/cpa.h>
+#include <qat/cpa_dc.h>
+
+#include <memory>
+
+#include "io_buffers.h"
+#include "qat_task.hpp"
+
+namespace qat
+{
+
+class DeflateSession
+{
+public:
+    DeflateSession(
+        std::shared_ptr<Instance> &, CpaDcCompLvl,
+        CpaDcChecksum, uint32_t numa_node);
+    ~DeflateSession();
+
+    std::unique_ptr<QATTask> Deflate(std::unique_ptr<IOBuffers> &&buffers, bool flush_final);
+    std::unique_ptr<QATTask> Inflate(std::unique_ptr<IOBuffers> &&buffers);
+
+    uint32_t GetDeflateBound(uint32_t input_size);
+
+    std::shared_ptr<Instance> getInstance() { return qat_instance_; }
+
+private:
+    std::shared_ptr<Instance> qat_instance_;
+    CpaDcSessionHandle session_;
+};
+
+}
+
+#endif
\ No newline at end of file
diff --git a/crc32.c b/crc32.c
index 4177e92..204aa1a 100644
--- a/crc32.c
+++ b/crc32.c
@@ -1168,6 +1168,11 @@
 
 ZLIB_INTERNAL void crc_finalize(deflate_state *const s)
 {
+#ifdef QAT_COMPRESSION_ENABLED
+    if (s->qat_s) {
+        return;
+    }
+#endif
 #ifdef CRC32_SIMD_SSE42_PCLMUL
     if (x86_cpu_enable_simd)
         s->strm->adler = crc_fold_512to32(s);
diff --git a/deflate.c b/deflate.c
index b9a3120..8a5281c 100644
--- a/deflate.c
+++ b/deflate.c
@@ -57,6 +57,10 @@
 #include "slide_hash_simd.h"
 #endif
 
+#if defined(QAT_COMPRESSION_ENABLED)
+#include "contrib/qat/deflate_qat.h"
+#endif
+
 #include "contrib/optimizations/insert_string.h"
 
 #ifdef FASTEST
@@ -564,6 +568,13 @@
     s->strategy = strategy;
     s->method = (Byte)method;
 
+#if defined(QAT_COMPRESSION_ENABLED)
+    s->qat_s = NULL;
+    if (s->level && qat_deflate_init() == Z_OK) {
+        s->qat_s = qat_deflate_state_init(s->level, s->wrap);
+    }
+#endif
+
     return deflateReset(strm);
 }
 
@@ -962,6 +973,12 @@
     unsigned len;
     deflate_state *s = strm->state;
 
+#if defined(QAT_COMPRESSION_ENABLED)
+    if (s->qat_s) {
+        qat_flush_pending(s);
+    }
+#endif
+
     _tr_flush_bits(s);
     len = s->pending;
     if (len > strm->avail_out) len = strm->avail_out;
@@ -1315,6 +1332,12 @@
     TRY_FREE(strm, strm->state->prev);
     TRY_FREE(strm, strm->state->window);
 
+#if defined(QAT_COMPRESSION_ENABLED)
+    if (strm->state->qat_s) {
+        qat_deflate_state_free(strm->state);
+    }
+#endif
+
     ZFREE(strm, strm->state);
     strm->state = Z_NULL;
 
@@ -1389,6 +1412,14 @@
     ds->d_desc.dyn_tree = ds->dyn_dtree;
     ds->bl_desc.dyn_tree = ds->bl_tree;
 
+#if defined(QAT_COMPRESSION_ENABLED)
+    if(ss->qat_s) {
+        ds->qat_s = qat_deflate_copy(ss);
+        if (!ds->qat_s)
+            return Z_MEM_ERROR;
+    }
+#endif
+
     return Z_OK;
 #endif /* MAXSEG_64K */
 }
@@ -1880,6 +1911,24 @@
     IPos hash_head;       /* head of the hash chain */
     int bflush;           /* set if current block must be flushed */
 
+#if defined(QAT_COMPRESSION_ENABLED)
+    if (s->qat_s) {
+        qat_block_state qat_block = qat_deflate_step(s, flush);
+        switch (qat_block) {
+        case qat_block_need_more:
+            return need_more;
+        case qat_block_done:
+            return block_done;
+        case qat_block_finish_started:
+            return finish_started;
+        case qat_block_finish_done:
+            return finish_done;
+        case qat_failure:
+            break;
+        }
+    }
+#endif
+
     for (;;) {
         /* Make sure that we always have enough lookahead, except
          * at the end of the input file. We need MAX_MATCH bytes
@@ -1982,6 +2031,24 @@
     IPos hash_head;          /* head of hash chain */
     int bflush;              /* set if current block must be flushed */
 
+#if defined(QAT_COMPRESSION_ENABLED)
+    if (s->qat_s) {
+        qat_block_state qat_block = qat_deflate_step(s, flush);
+        switch (qat_block) {
+        case qat_block_need_more:
+            return need_more;
+        case qat_block_done:
+            return block_done;
+        case qat_block_finish_started:
+            return finish_started;
+        case qat_block_finish_done:
+            return finish_done;
+        case qat_failure:
+            break;
+        }
+    }
+#endif
+
     /* Process the input block. */
     for (;;) {
         /* Make sure that we always have enough lookahead, except
diff --git a/deflate.h b/deflate.h
index eb7f072..099d359 100644
--- a/deflate.h
+++ b/deflate.h
@@ -282,6 +282,13 @@
      * hash is enabled.
      */
 
+#if defined(QAT_COMPRESSION_ENABLED)
+    /* Pointer to a struct that contains the current state of the QAT
+     * stream.
+     */
+    struct qat_deflate *qat_s;
+#endif
+
 } FAR deflate_state;
 
 /* Output a byte on the stream.