Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 43 additions & 5 deletions pooch/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,33 @@
"""

import abc
import bz2
import gzip
import lzma
import os
import shutil
import sys
import typing
from tarfile import TarFile
from zipfile import ZipFile

# bz2, gzip and lzma are optional features of the Python standard library: a
# Python interpreter can be built without them (e.g. when the bzip2/lzma/zlib
# development headers are missing). Importing them at module level would make
# *any* use of pooch fail on such interpreters, even when no decompression is
# needed. Guard the imports so the modules are only required when the matching
# Decompress method is actually used (see GH #468). The ``type: ignore`` marks
# the ``None`` fallback as intentional for the type checker.
try:
import bz2
except ImportError:
bz2 = None # type: ignore[assignment]
try:
import gzip
except ImportError:
gzip = None # type: ignore[assignment]
try:
import lzma
except ImportError:
lzma = None # type: ignore[assignment]

from .utils import get_logger


Expand Down Expand Up @@ -342,6 +359,14 @@ class Decompress:
"bzip2": bz2,
}
extensions: typing.ClassVar = {".xz": "lzma", ".gz": "gzip", ".bz2": "bzip2"}
# Name of the standard-library module backing each method, used to give a
# clear error when that (optional) module isn't available (see GH #468).
module_names: typing.ClassVar = {
"lzma": "lzma",
"xz": "lzma",
"gzip": "gzip",
"bzip2": "bz2",
}

def __init__(self, method="auto", name=None):
self.method = method
Expand Down Expand Up @@ -417,5 +442,18 @@ def _compression_module(self, fname):
if ext in {".zip", ".tar"}:
message = " ".join([message, error_archives])
raise ValueError(message)
return self.modules[self.extensions[ext]]
return self.modules[self.method]
method = self.extensions[ext]
else:
method = self.method
module = self.modules[method]
if module is None:
module_name = self.module_names[method]
message = (
f"Could not decompress '{fname}' because the '{module_name}' "
"module is not available in this Python installation. This "
f"usually means Python was built without '{module_name}' "
"support. Rebuild or reinstall Python with the required "
"support to use this compression method."
)
raise ValueError(message)
return module
46 changes: 46 additions & 0 deletions pooch/tests/test_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
"""

import re
import subprocess
import sys
from pathlib import Path
from tempfile import TemporaryDirectory

Expand Down Expand Up @@ -95,6 +97,50 @@ def test_decompress_fails():
assert "pooch.Unzip/Untar" in exception.value.args[0]


@pytest.mark.parametrize(
("method", "fname", "module_name"),
[
("lzma", "data.xz", "lzma"),
("xz", "data.xz", "lzma"),
("bzip2", "data.bz2", "bz2"),
("auto", "data.bz2", "bz2"),
],
)
def test_decompress_unavailable_module(monkeypatch, method, fname, module_name):
"A clear error should be raised when the compression module is unavailable"
# Simulate a Python built without the optional module (see GH #468)
monkeypatch.setitem(Decompress.modules, "lzma", None)
monkeypatch.setitem(Decompress.modules, "xz", None)
monkeypatch.setitem(Decompress.modules, "bzip2", None)
processor = Decompress(method=method)
with pytest.raises(ValueError, match=re.escape(f"'{module_name}' module")):
processor._compression_module(fname)


def test_processors_import_without_optional_modules():
"Importing pooch must not fail when bz2/lzma are missing (see GH #468)"
code = (
"import builtins\n"
"_real = builtins.__import__\n"
"def _fake(name, *args, **kwargs):\n"
" if name in ('lzma', '_lzma', 'bz2', '_bz2'):\n"
" raise ModuleNotFoundError(\"No module named '%s'\" % name)\n"
" return _real(name, *args, **kwargs)\n"
"builtins.__import__ = _fake\n"
"import pooch.processors as p\n"
"assert p.lzma is None and p.bz2 is None and p.gzip is not None\n"
"print('IMPORT_OK')\n"
)
result = subprocess.run(
[sys.executable, "-c", code],
capture_output=True,
text=True,
check=False,
)
assert result.returncode == 0, result.stderr
assert "IMPORT_OK" in result.stdout


@pytest.mark.network
@pytest.mark.parametrize(
"target_path", [None, "some_custom_path"], ids=["default_path", "custom_path"]
Expand Down
Loading