Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ If you want to use avx512 to accelerate CRC calculation.
cmake -D ENABLE_ISAL=1 ..
```

If you want to use QAT to accelerate compression/decompression.
If you want to use QAT to accelerate compression/decompression.However, currently only the decompression part has been integrated. Since LZ4 is already a highly efficient compression algorithm, our tests show that QAT can only outperform the CPU at a 4KB block size and a batch size of 256 when the compression ratio significantly exceeds a threshold.

```bash
cmake -D ENABLE_QAT=1 ..
Expand Down
26 changes: 23 additions & 3 deletions src/overlaybd/zfile/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
file(GLOB SOURCE_ZFILE "*.cpp")
file(GLOB SOURCE_LZ4 "lz4/*.c")
file(GLOB SOURCE_LZ4 "lz4/*.c" "lz4/*.cpp")
file(GLOB SOURCE_CRC32 "crc32/crc32c.cpp")

set (CMAKE_CXX_STANDARD 17)
Expand Down Expand Up @@ -38,13 +38,33 @@ if(ENABLE_DSA OR ENABLE_ISAL)
endif()
set (CMAKE_CXX_STANDARD 14)

# ---- QAT auto-detection ----
if (ENABLE_QAT)
find_path(QAT_INCLUDE_DIR NAMES qat/cpa.h
PATHS /usr/include/qat /usr/local/include/qat)
find_library(QAT_LIBRARY NAMES qat)
find_library(USDM_LIBRARY NAMES usdm)
if (QAT_INCLUDE_DIR AND QAT_LIBRARY AND USDM_LIBRARY)
message(STATUS "QAT acceleration: ENABLED (include=${QAT_INCLUDE_DIR})")
else()
message(WARNING "ENABLE_QAT=ON but QAT headers/libs not found; disabling QAT")
set(ENABLE_QAT OFF)
endif()
endif()
if (NOT ENABLE_QAT)
# When QAT is off, exclude lz4-qat.cpp so we don't need QAT headers at all
list(REMOVE_ITEM SOURCE_LZ4 "${CMAKE_CURRENT_SOURCE_DIR}/lz4/lz4-qat.cpp")
message(STATUS "QAT acceleration: DISABLED")
endif()
# --------------------------------

add_library(zfile_lib STATIC ${SOURCE_ZFILE} ${SOURCE_LZ4})
target_link_libraries(zfile_lib photon_static crc32_lib ${LIBZSTD})

if (ENABLE_QAT)
target_compile_definitions(zfile_lib PUBLIC -DENABLE_QAT)
target_link_libraries(zfile_lib -lpthread -lpci)
#target_link_libraries(zfile_lib -lqat_s -lusdm_drv_s -lpthread -lpci)
target_include_directories(zfile_lib PUBLIC ${QAT_INCLUDE_DIR})
target_link_libraries(zfile_lib ${QAT_LIBRARY} ${USDM_LIBRARY} -lpci -lpthread)
endif()

if (BUILD_TESTING)
Expand Down
66 changes: 43 additions & 23 deletions src/overlaybd/zfile/compressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#ifdef ENABLE_QAT
#include "lz4/lz4-qat.h"
#include <atomic>
extern "C" {
#include <pci/pci.h>
}
Expand All @@ -37,7 +38,11 @@ namespace ZFile {

#define QAT_VENDOR_ID 0x8086
#define QAT_DEVICE_ID 0x4940

#ifdef ENABLE_QAT
/* 0 = unprobed; 1 = available; 2 = unavailable. Cached process-wide so repeat
* LZ4Compressor::init calls skip PCI scan + qat_init when QAT is absent. */
static std::atomic<int> g_qat_state{0};
#endif
class BaseCompressor : public ICompressor {
public:
uint32_t max_dst_size = 0;
Expand Down Expand Up @@ -147,21 +152,28 @@ class LZ4Compressor : public BaseCompressor {

bool check_qat() {
#ifdef ENABLE_QAT
struct pci_access *pacc;
struct pci_dev *dev;
pacc = pci_alloc();
int cached = g_qat_state.load(std::memory_order_acquire);
if (cached == 1) return true;
if (cached == 2) return false;

struct pci_access *pacc = pci_alloc();
if (!pacc) {
g_qat_state.store(2, std::memory_order_release);
return false;
}
pci_init(pacc);
pci_scan_bus(pacc);
for (dev = pacc->devices; dev; dev = dev->next) {
bool found = false;
for (struct pci_dev *dev = pacc->devices; dev; dev = dev->next) {
pci_fill_info(dev, PCI_FILL_IDENT | PCI_FILL_BASES);
if (dev->vendor_id == QAT_VENDOR_ID && dev->device_id == QAT_DEVICE_ID) {
pci_cleanup(pacc);
return true;
found = true;
break;
}
}
pci_cleanup(pacc);

return false;
if (!found) g_qat_state.store(2, std::memory_order_release);
return found;
#endif
return false;
}
Expand All @@ -180,15 +192,23 @@ class LZ4Compressor : public BaseCompressor {
#ifdef ENABLE_QAT
if (check_qat()) {
pQat = new LZ4_qat_param();
qat_init(pQat);
qat_enable = true;
if (qat_init(pQat) == 0) {
qat_enable = true;
g_qat_state.store(1, std::memory_order_release);
/* nbatch() now returns DEFAULT_N_BATCH (was 1 when BaseCompressor::init ran). */
compressed_data.resize(DEFAULT_N_BATCH);
uncompressed_data.resize(DEFAULT_N_BATCH);
} else {
delete pQat;
pQat = nullptr;
g_qat_state.store(2, std::memory_order_release);
}
}
#endif
return 0;
}

int nbatch() override {
// return DEFAULT_N_BATCH;
return (qat_enable ? DEFAULT_N_BATCH : 1);
}

Expand All @@ -197,13 +217,13 @@ class LZ4Compressor : public BaseCompressor {

int ret = 0;
#ifdef ENABLE_QAT
if (qat_enable) {
ret = LZ4_compress_qat(pQat, &raw_data[0], src_chunk_len, &compressed_data[0],
dst_chunk_len, n);
if (ret < 0) {
LOG_ERROR_RETURN(EFAULT, -1, "LZ4 compress data failed. (retcode: `).", ret);
}
return ret;
if (qat_enable) {
/* dst_chunk_len in = capacity, out = actual compressed bytes. */
for (size_t i = 0; i < nblock; i++) dst_chunk_len[i] = dst_buffer_capacity / nblock;
ret = LZ4_compress_qat(pQat, &uncompressed_data[0], src_chunk_len,
&compressed_data[0], dst_chunk_len, nblock);
if (ret == 0) return 0;
/* Any QAT failure falls through to the CPU loop below. */
}
#endif
for (size_t i = 0; i < nblock; i++) {
Expand All @@ -230,12 +250,12 @@ class LZ4Compressor : public BaseCompressor {
int ret = 0;
#ifdef ENABLE_QAT
if (qat_enable) {
/* dst_chunk_len in = capacity, out = actual decompressed bytes. */
for (size_t i = 0; i < n; i++) dst_chunk_len[i] = dst_buffer_capacity / n;
ret = LZ4_decompress_qat(pQat, &compressed_data[0], src_chunk_len,
&uncompressed_data[0], dst_chunk_len, n);
if (ret < 0) {
LOG_ERROR_RETURN(EFAULT, -1, "LZ4 decompress data failed. (retcode: `).", ret);
}
return ret;
if (ret == 0) return 0;
/* Any QAT failure falls through to the CPU loop below; not duplicated in lz4-qat. */
}
#endif
for (size_t i = 0; i < n; i++) {
Expand Down
46 changes: 0 additions & 46 deletions src/overlaybd/zfile/lz4/lz4-qat.c

This file was deleted.

Loading
Loading