# Copyright 2023-2025 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 EAPI=8 DISTUTILS_EXT=1 DISTUTILS_USE_PEP517=setuptools PYTHON_COMPAT=( python3_{10..13} ) inherit distutils-r1 multiprocessing # arrow.git: testing ARROW_DATA_GIT_HASH=4d209492d514c2d3cb2d392681b9aa00e6d8da1c # arrow.git: cpp/submodules/parquet-testing PARQUET_DATA_GIT_HASH=cb7a9674142c137367bf75a01b79c6e214a73199 DESCRIPTION="Python library for Apache Arrow" HOMEPAGE=" https://arrow.apache.org/ https://github.com/apache/arrow/ https://pypi.org/project/pyarrow/ " SRC_URI=" mirror://apache/arrow/arrow-${PV}/apache-arrow-${PV}.tar.gz test? ( https://github.com/apache/parquet-testing/archive/${PARQUET_DATA_GIT_HASH}.tar.gz -> parquet-testing-${PARQUET_DATA_GIT_HASH}.tar.gz https://github.com/apache/arrow-testing/archive/${ARROW_DATA_GIT_HASH}.tar.gz -> arrow-testing-${ARROW_DATA_GIT_HASH}.tar.gz ) " S="${WORKDIR}/apache-arrow-${PV}/python" LICENSE="Apache-2.0" SLOT="0" KEYWORDS="~amd64 ~arm64 ~riscv ~x86" IUSE="+parquet +snappy ssl" RDEPEND=" ~dev-libs/apache-arrow-${PV}[compute,dataset,json,parquet?,re2,snappy?,ssl?] >=dev-python/numpy-1.16.6:=[${PYTHON_USEDEP}] " BDEPEND=" test? ( dev-python/cffi[${PYTHON_USEDEP}] dev-python/hypothesis[${PYTHON_USEDEP}] dev-python/pandas[${PYTHON_USEDEP}] dev-python/pytz[${PYTHON_USEDEP}] dev-libs/apache-arrow[lz4,zlib] ) " EPYTEST_XDIST=1 distutils_enable_tests pytest src_prepare() { distutils-r1_src_prepare # cython's -Werror sed -i -e '/--warning-errors/d' CMakeLists.txt || die } src_compile() { export PYARROW_PARALLEL="$(makeopts_jobs)" export PYARROW_BUILD_VERBOSE=1 export PYARROW_CXXFLAGS="${CXXFLAGS}" export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0 export PYARROW_CMAKE_GENERATOR=Ninja export PYARROW_WITH_HDFS=1 if use parquet; then export PYARROW_WITH_DATASET=1 export PYARROW_WITH_PARQUET=1 use ssl && export PYARROW_WITH_PARQUET_ENCRYPTION=1 fi if use snappy; then export PYARROW_WITH_SNAPPY=1 fi distutils-r1_src_compile } python_test() { local EPYTEST_DESELECT=( # wtf? tests/test_fs.py::test_localfs_errors # these require apache-arrow with jemalloc that doesn't seem # to be supported by the Gentoo package tests/test_memory.py::test_env_var tests/test_memory.py::test_specific_memory_pools tests/test_memory.py::test_supported_memory_backends # require mimalloc tests/test_memory.py::test_memory_pool_factories # hypothesis health check failures # https://github.com/apache/arrow/issues/41318 tests/interchange/test_interchange_spec.py::test_dtypes tests/test_convert_builtin.py::test_array_to_pylist_roundtrip tests/test_feather.py::test_roundtrip tests/test_pandas.py::test_array_to_pandas_roundtrip tests/test_strategies.py::test_types tests/test_types.py::test_hashing # fragile memory tests tests/test_csv.py::TestSerialStreamingCSVRead::test_batch_lifetime tests/test_csv.py::TestThreadedStreamingCSVRead::test_batch_lifetime # takes forever, and manages to generate timedeltas over 64 bits tests/test_strategies.py "tests/test_array.py::test_pickling[builtin_pickle]" # scipy.sparse does not support dtype float16 "tests/test_sparse_tensor.py::test_sparse_coo_tensor_scipy_roundtrip[f2-arrow_type8]" ) cd "${T}" || die local -x PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 local -x PARQUET_TEST_DATA="${WORKDIR}/parquet-testing-${PARQUET_DATA_GIT_HASH}/data" local -x ARROW_TEST_DATA="${WORKDIR}/arrow-testing-${ARROW_DATA_GIT_HASH}/data" epytest --pyargs pyarrow }