From bcc6c521cbfb6fad0af65984b2b77ebf3d25d06e Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 24 Jan 2023 18:18:28 +0100 Subject: [PATCH 1/2] Fix requote redirect url --- src/datasets/filesystems/compression.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/datasets/filesystems/compression.py b/src/datasets/filesystems/compression.py index 2754b418c21..4701cc41e0b 100644 --- a/src/datasets/filesystems/compression.py +++ b/src/datasets/filesystems/compression.py @@ -35,7 +35,12 @@ def __init__( super().__init__(self, **kwargs) # always open as "rb" since fsspec can then use the TextIOWrapper to make it work for "r" mode self.file = fsspec.open( - fo, mode="rb", protocol=target_protocol, compression=self.compression, **(target_options or {}) + fo, + mode="rb", + protocol=target_protocol, + compression=self.compression, + client_kwargs={"requote_redirect_url": False}, + **(target_options or {}), ) self.compressed_name = os.path.basename(self.file.path.split("::")[0]) self.uncompressed_name = ( From 0533516cd90868b0fcc0aad927bd652f3a97d7d3 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 31 Jan 2023 09:18:16 +0100 Subject: [PATCH 2/2] Add explanatory comment Co-authored-by: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> --- src/datasets/filesystems/compression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets/filesystems/compression.py b/src/datasets/filesystems/compression.py index 4701cc41e0b..32f8cc38438 100644 --- a/src/datasets/filesystems/compression.py +++ b/src/datasets/filesystems/compression.py @@ -39,7 +39,7 @@ def __init__( mode="rb", protocol=target_protocol, compression=self.compression, - client_kwargs={"requote_redirect_url": False}, + client_kwargs={"requote_redirect_url": False}, # see https://github.com/huggingface/datasets/pull/5459 **(target_options or {}), ) self.compressed_name = os.path.basename(self.file.path.split("::")[0])