Extract and load data directly from a tarball

This is a sample code to extract a tarball (tar.gz) and load data into a numpy array. You may also load the file into a pandas dataframe.

from io import BytesIO
import tarfile
from urllib.request import urlopen
# getting url for tarfile
url = 'url/to/tarfile.tgz'
b = BytesIO(urlopen(url).read())
fpath = 'local_folder_path/to/extract/data'

with tarfile.open(mode='r', fileobj=b) as archive:
    numpy_data = np.loadtxt(archive.extractfile(fpath), delimiter=',')


