#!/usr/bin/env python3 """ Parts of this file were taken from the pyzmq project (https://github.com/zeromq/pyzmq) which have been permitted for use under the BSD license. Parts are from lxml (https://github.com/lxml/lxml) """ import argparse import multiprocessing import os from os.path import join as pjoin import platform import shutil import sys from sysconfig import get_config_vars import numpy from pkg_resources import parse_version from setuptools import ( Command, Extension, setup, ) from setuptools.command.build_ext import build_ext as _build_ext import versioneer cmdclass = versioneer.get_cmdclass() def is_platform_windows(): return sys.platform in ("win32", "cygwin") def is_platform_mac(): return sys.platform == "darwin" # note: sync with pyproject.toml, environment.yml and asv.conf.json min_cython_ver = "3.0" try: from Cython import ( Tempita, __version__ as _CYTHON_VERSION, ) from Cython.Build import cythonize _CYTHON_INSTALLED = parse_version(_CYTHON_VERSION) >= parse_version(min_cython_ver) except ImportError: _CYTHON_VERSION = None _CYTHON_INSTALLED = False cythonize = lambda x, *args, **kwargs: x # dummy func _pxi_dep_template = { "algos": ["_libs/algos_common_helper.pxi.in", "_libs/algos_take_helper.pxi.in"], "hashtable": [ "_libs/hashtable_class_helper.pxi.in", "_libs/hashtable_func_helper.pxi.in", "_libs/khash_for_primitive_helper.pxi.in", ], "index": ["_libs/index_class_helper.pxi.in"], "sparse": ["_libs/sparse_op_helper.pxi.in"], "interval": ["_libs/intervaltree.pxi.in"], } _pxifiles = [] _pxi_dep = {} for module, files in _pxi_dep_template.items(): pxi_files = [pjoin("pandas", x) for x in files] _pxifiles.extend(pxi_files) _pxi_dep[module] = pxi_files class build_ext(_build_ext): @classmethod def render_templates(cls, pxifiles) -> None: for pxifile in pxifiles: # build pxifiles first, template extension must be .pxi.in assert pxifile.endswith(".pxi.in") outfile = pxifile[:-3] if ( os.path.exists(outfile) and os.stat(pxifile).st_mtime < os.stat(outfile).st_mtime ): # if .pxi.in is not updated, no need to output .pxi continue with open(pxifile, encoding="utf-8") as f: tmpl = f.read() pyxcontent = Tempita.sub(tmpl) with open(outfile, "w", encoding="utf-8") as f: f.write(pyxcontent) def build_extensions(self) -> None: # if building from c files, don't need to # generate template output if _CYTHON_INSTALLED: self.render_templates(_pxifiles) super().build_extensions() class CleanCommand(Command): """Custom command to clean the .so and .pyc files.""" user_options = [("all", "a", "")] def initialize_options(self) -> None: self.all = True self._clean_me = [] self._clean_trees = [] base = pjoin("pandas", "_libs", "src") parser = pjoin(base, "parser") vendored = pjoin(base, "vendored") dt = pjoin(base, "datetime") ujson_python = pjoin(vendored, "ujson", "python") ujson_lib = pjoin(vendored, "ujson", "lib") self._clean_exclude = [ pjoin(vendored, "numpy", "datetime", "np_datetime.c"), pjoin(vendored, "numpy", "datetime", "np_datetime_strings.c"), pjoin(dt, "date_conversions.c"), pjoin(parser, "tokenizer.c"), pjoin(parser, "io.c"), pjoin(ujson_python, "ujson.c"), pjoin(ujson_python, "objToJSON.c"), pjoin(ujson_python, "JSONtoObj.c"), pjoin(ujson_lib, "ultrajsonenc.c"), pjoin(ujson_lib, "ultrajsondec.c"), pjoin(dt, "pd_datetime.c"), pjoin(parser, "pd_parser.c"), ] for root, dirs, files in os.walk("pandas"): for f in files: filepath = pjoin(root, f) if filepath in self._clean_exclude: continue if os.path.splitext(f)[-1] in ( ".pyc", ".so", ".o", ".pyo", ".pyd", ".c", ".cpp", ".orig", ): self._clean_me.append(filepath) self._clean_trees.append(pjoin(root, d) for d in dirs if d == "__pycache__") # clean the generated pxi files for pxifile in _pxifiles: pxifile_replaced = pxifile.replace(".pxi.in", ".pxi") self._clean_me.append(pxifile_replaced) self._clean_trees.append(d for d in ("build", "dist") if os.path.exists(d)) def finalize_options(self) -> None: pass def run(self) -> None: for clean_me in self._clean_me: try: os.unlink(clean_me) except OSError: pass for clean_tree in self._clean_trees: try: shutil.rmtree(clean_tree) except OSError: pass # we need to inherit from the versioneer # class as it encodes the version info sdist_class = cmdclass["sdist"] class CheckSDist(sdist_class): """Custom sdist that ensures Cython has compiled all pyx files to c.""" _pyxfiles = [ "pandas/_libs/arrays.pyx", "pandas/_libs/lib.pyx", "pandas/_libs/hashtable.pyx", "pandas/_libs/tslib.pyx", "pandas/_libs/index.pyx", "pandas/_libs/internals.pyx", "pandas/_libs/algos.pyx", "pandas/_libs/join.pyx", "pandas/_libs/indexing.pyx", "pandas/_libs/interval.pyx", "pandas/_libs/hashing.pyx", "pandas/_libs/missing.pyx", "pandas/_libs/testing.pyx", "pandas/_libs/sparse.pyx", "pandas/_libs/ops.pyx", "pandas/_libs/parsers.pyx", "pandas/_libs/tslibs/base.pyx", "pandas/_libs/tslibs/ccalendar.pyx", "pandas/_libs/tslibs/dtypes.pyx", "pandas/_libs/tslibs/period.pyx", "pandas/_libs/tslibs/strptime.pyx", "pandas/_libs/tslibs/np_datetime.pyx", "pandas/_libs/tslibs/timedeltas.pyx", "pandas/_libs/tslibs/timestamps.pyx", "pandas/_libs/tslibs/timezones.pyx", "pandas/_libs/tslibs/conversion.pyx", "pandas/_libs/tslibs/fields.pyx", "pandas/_libs/tslibs/offsets.pyx", "pandas/_libs/tslibs/parsing.pyx", "pandas/_libs/tslibs/tzconversion.pyx", "pandas/_libs/tslibs/vectorized.pyx", "pandas/_libs/window/indexers.pyx", "pandas/_libs/writers.pyx", "pandas/_libs/sas.pyx", "pandas/_libs/byteswap.pyx", ] _cpp_pyxfiles = [ "pandas/_libs/window/aggregations.pyx", ] def initialize_options(self) -> None: sdist_class.initialize_options(self) def run(self) -> None: if "cython" in cmdclass: self.run_command("cython") else: # If we are not running cython then # compile the extensions correctly pyx_files = [(self._pyxfiles, "c"), (self._cpp_pyxfiles, "cpp")] for pyxfiles, extension in pyx_files: for pyxfile in pyxfiles: sourcefile = pyxfile[:-3] + extension msg = ( f"{extension}-source file '{sourcefile}' not found.\n" "Run 'setup.py cython' before sdist." ) assert os.path.isfile(sourcefile), msg sdist_class.run(self) class CheckingBuildExt(build_ext): """ Subclass build_ext to get clearer report if Cython is necessary. """ def check_cython_extensions(self, extensions) -> None: for ext in extensions: for src in ext.sources: if not os.path.exists(src): print(f"{ext.name}: -> [{ext.sources}]") raise Exception( f"""Cython-generated file '{src}' not found. Cython is required to compile pandas from a development branch. Please install Cython or download a release package of pandas. """ ) def build_extensions(self) -> None: self.check_cython_extensions(self.extensions) build_ext.build_extensions(self) class CythonCommand(build_ext): """ Custom command subclassed from Cython.Distutils.build_ext to compile pyx->c, and stop there. All this does is override the C-compile method build_extension() with a no-op. """ def build_extension(self, ext) -> None: pass class DummyBuildSrc(Command): """numpy's build_src command interferes with Cython's build_ext.""" user_options = [] def initialize_options(self) -> None: self.py_modules_dict = {} def finalize_options(self) -> None: pass def run(self) -> None: pass cmdclass["clean"] = CleanCommand cmdclass["build_ext"] = CheckingBuildExt if _CYTHON_INSTALLED: suffix = ".pyx" cmdclass["cython"] = CythonCommand else: suffix = ".c" cmdclass["build_src"] = DummyBuildSrc # ---------------------------------------------------------------------- # Preparation of compiler arguments debugging_symbols_requested = "--with-debugging-symbols" in sys.argv if debugging_symbols_requested: sys.argv.remove("--with-debugging-symbols") if sys.byteorder == "big": endian_macro = [("__BIG_ENDIAN__", "1")] else: endian_macro = [("__LITTLE_ENDIAN__", "1")] extra_compile_args = [] extra_link_args = [] if is_platform_windows(): if debugging_symbols_requested: extra_compile_args.append("/Z7") extra_link_args.append("/DEBUG") else: # PANDAS_CI=1 is set in CI if os.environ.get("PANDAS_CI", "0") == "1": extra_compile_args.append("-Werror") if debugging_symbols_requested: extra_compile_args.append("-g3") extra_compile_args.append("-UNDEBUG") extra_compile_args.append("-O0") # Build for at least macOS 10.9 when compiling on a 10.9 system or above, # overriding CPython distuitls behaviour which is to target the version that # python was built for. This may be overridden by setting # MACOSX_DEPLOYMENT_TARGET before calling setup.py if is_platform_mac(): if "MACOSX_DEPLOYMENT_TARGET" not in os.environ: current_system = platform.mac_ver()[0] python_target = get_config_vars().get( "MACOSX_DEPLOYMENT_TARGET", current_system ) target_macos_version = "10.9" parsed_macos_version = parse_version(target_macos_version) if ( parse_version(str(python_target)) < parsed_macos_version <= parse_version(current_system) ): os.environ["MACOSX_DEPLOYMENT_TARGET"] = target_macos_version if sys.version_info[:2] == (3, 8): # GH 33239 extra_compile_args.append("-Wno-error=deprecated-declarations") # https://github.com/pandas-dev/pandas/issues/35559 extra_compile_args.append("-Wno-error=unreachable-code") # enable coverage by building cython files by setting the environment variable # "PANDAS_CYTHON_COVERAGE" (with a Truthy value) or by running build_ext # with `--with-cython-coverage`enabled linetrace = os.environ.get("PANDAS_CYTHON_COVERAGE", False) if "--with-cython-coverage" in sys.argv: linetrace = True sys.argv.remove("--with-cython-coverage") # Note: if not using `cythonize`, coverage can be enabled by # pinning `ext.cython_directives = directives` to each ext in extensions. # github.com/cython/cython/wiki/enhancements-compilerdirectives#in-setuppy directives = {"linetrace": False, "language_level": 3, "always_allow_keywords": True} macros = [] if linetrace: # https://pypkg.com/pypi/pytest-cython/f/tests/example-project/setup.py directives["linetrace"] = True macros = [("CYTHON_TRACE", "1"), ("CYTHON_TRACE_NOGIL", "1")] # silence build warnings about deprecated API usage # we can't do anything about these warnings because they stem from # cython+numpy version mismatches. macros.append(("NPY_NO_DEPRECATED_API", "0")) # ---------------------------------------------------------------------- # Specification of Dependencies # TODO(cython#4518): Need to check to see if e.g. `linetrace` has changed and # possibly re-compile. def maybe_cythonize(extensions, *args, **kwargs): """ Render tempita templates before calling cythonize. This is skipped for * clean * sdist """ if "clean" in sys.argv or "sdist" in sys.argv: # See https://github.com/cython/cython/issues/1495 return extensions elif not _CYTHON_INSTALLED: # GH#28836 raise a helfpul error message if _CYTHON_VERSION: raise RuntimeError( f"Cannot cythonize with old Cython version ({_CYTHON_VERSION} " f"installed, needs {min_cython_ver})" ) raise RuntimeError("Cannot cythonize without Cython installed.") # reuse any parallel arguments provided for compilation to cythonize parser = argparse.ArgumentParser() parser.add_argument("--parallel", "-j", type=int, default=1) parsed, _ = parser.parse_known_args() kwargs["nthreads"] = parsed.parallel build_ext.render_templates(_pxifiles) if debugging_symbols_requested: kwargs["gdb_debug"] = True return cythonize(extensions, *args, **kwargs) def srcpath(name=None, suffix=".pyx", subdir="src"): return pjoin("pandas", subdir, name + suffix) lib_depends = ["pandas/_libs/include/pandas/parse_helper.h"] tseries_depends = [ "pandas/_libs/include/pandas/datetime/pd_datetime.h", ] ext_data = { "_libs.algos": { "pyxfile": "_libs/algos", "depends": _pxi_dep["algos"], }, "_libs.arrays": {"pyxfile": "_libs/arrays"}, "_libs.groupby": {"pyxfile": "_libs/groupby"}, "_libs.hashing": {"pyxfile": "_libs/hashing", "depends": []}, "_libs.hashtable": { "pyxfile": "_libs/hashtable", "depends": ( [ "pandas/_libs/include/pandas/vendored/klib/khash_python.h", "pandas/_libs/include/pandas/vendored/klib/khash.h", ] + _pxi_dep["hashtable"] ), }, "_libs.index": { "pyxfile": "_libs/index", "depends": _pxi_dep["index"], }, "_libs.indexing": {"pyxfile": "_libs/indexing"}, "_libs.internals": {"pyxfile": "_libs/internals"}, "_libs.interval": { "pyxfile": "_libs/interval", "depends": _pxi_dep["interval"], }, "_libs.join": {"pyxfile": "_libs/join"}, "_libs.lib": { "pyxfile": "_libs/lib", "depends": lib_depends + tseries_depends, }, "_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends}, "_libs.parsers": { "pyxfile": "_libs/parsers", "depends": [ "pandas/_libs/src/parser/tokenizer.h", "pandas/_libs/src/parser/io.h", "pandas/_libs/src/pd_parser.h", ], }, "_libs.ops": {"pyxfile": "_libs/ops"}, "_libs.ops_dispatch": {"pyxfile": "_libs/ops_dispatch"}, "_libs.properties": {"pyxfile": "_libs/properties"}, "_libs.reshape": {"pyxfile": "_libs/reshape", "depends": []}, "_libs.sparse": {"pyxfile": "_libs/sparse", "depends": _pxi_dep["sparse"]}, "_libs.tslib": { "pyxfile": "_libs/tslib", "depends": tseries_depends, }, "_libs.tslibs.base": {"pyxfile": "_libs/tslibs/base"}, "_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar"}, "_libs.tslibs.dtypes": {"pyxfile": "_libs/tslibs/dtypes"}, "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", "depends": tseries_depends, }, "_libs.tslibs.fields": { "pyxfile": "_libs/tslibs/fields", "depends": tseries_depends, }, "_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"}, "_libs.tslibs.np_datetime": { "pyxfile": "_libs/tslibs/np_datetime", "depends": tseries_depends, }, "_libs.tslibs.offsets": { "pyxfile": "_libs/tslibs/offsets", "depends": tseries_depends, }, "_libs.tslibs.parsing": { "pyxfile": "_libs/tslibs/parsing", "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, "_libs.tslibs.period": { "pyxfile": "_libs/tslibs/period", "depends": tseries_depends, }, "_libs.tslibs.strptime": { "pyxfile": "_libs/tslibs/strptime", "depends": tseries_depends, }, "_libs.tslibs.timedeltas": { "pyxfile": "_libs/tslibs/timedeltas", "depends": tseries_depends, }, "_libs.tslibs.timestamps": { "pyxfile": "_libs/tslibs/timestamps", "depends": tseries_depends, }, "_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"}, "_libs.tslibs.tzconversion": { "pyxfile": "_libs/tslibs/tzconversion", "depends": tseries_depends, }, "_libs.tslibs.vectorized": { "pyxfile": "_libs/tslibs/vectorized", "depends": tseries_depends, }, "_libs.testing": {"pyxfile": "_libs/testing"}, "_libs.window.aggregations": { "pyxfile": "_libs/window/aggregations", "language": "c++", "suffix": ".cpp", "depends": ["pandas/_libs/include/pandas/skiplist.h"], }, "_libs.window.indexers": {"pyxfile": "_libs/window/indexers"}, "_libs.writers": {"pyxfile": "_libs/writers"}, "_libs.sas": {"pyxfile": "_libs/sas"}, "_libs.byteswap": {"pyxfile": "_libs/byteswap"}, } extensions = [] for name, data in ext_data.items(): source_suffix = suffix if suffix == ".pyx" else data.get("suffix", ".c") sources = [srcpath(data["pyxfile"], suffix=source_suffix, subdir="")] sources.extend(data.get("sources", [])) include = ["pandas/_libs/include", numpy.get_include()] undef_macros = [] if ( sys.platform == "zos" and data.get("language") == "c++" and os.path.basename(os.environ.get("CXX", "/bin/xlc++")) in ("xlc", "xlc++") ): data.get("macros", macros).append(("__s390__", "1")) extra_compile_args.append("-qlanglvl=extended0x:nolibext") undef_macros.append("_POSIX_THREADS") obj = Extension( f"pandas.{name}", sources=sources, depends=data.get("depends", []), include_dirs=include, language=data.get("language", "c"), define_macros=data.get("macros", macros), extra_compile_args=extra_compile_args, extra_link_args=extra_link_args, undef_macros=undef_macros, ) extensions.append(obj) # ---------------------------------------------------------------------- # ujson if suffix == ".pyx": # undo dumb setuptools bug clobbering .pyx sources back to .c for ext in extensions: if ext.sources[0].endswith((".c", ".cpp")): root, _ = os.path.splitext(ext.sources[0]) ext.sources[0] = root + suffix ujson_ext = Extension( "pandas._libs.json", depends=[ "pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h", "pandas/_libs/include/pandas/datetime/pd_datetime.h", ], sources=( [ "pandas/_libs/src/vendored/ujson/python/ujson.c", "pandas/_libs/src/vendored/ujson/python/objToJSON.c", "pandas/_libs/src/vendored/ujson/python/JSONtoObj.c", "pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c", "pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c", ] ), include_dirs=[ "pandas/_libs/include", numpy.get_include(), ], extra_compile_args=(extra_compile_args), extra_link_args=extra_link_args, define_macros=macros, ) extensions.append(ujson_ext) # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- # pd_datetime pd_dt_ext = Extension( "pandas._libs.pandas_datetime", depends=["pandas/_libs/tslibs/datetime/pd_datetime.h"], sources=( [ "pandas/_libs/src/vendored/numpy/datetime/np_datetime.c", "pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c", "pandas/_libs/src/datetime/date_conversions.c", "pandas/_libs/src/datetime/pd_datetime.c", ] ), include_dirs=[ "pandas/_libs/include", numpy.get_include(), ], extra_compile_args=(extra_compile_args), extra_link_args=extra_link_args, define_macros=macros, ) extensions.append(pd_dt_ext) # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- # pd_datetime pd_parser_ext = Extension( "pandas._libs.pandas_parser", depends=["pandas/_libs/include/pandas/parser/pd_parser.h"], sources=( [ "pandas/_libs/src/parser/tokenizer.c", "pandas/_libs/src/parser/io.c", "pandas/_libs/src/parser/pd_parser.c", ] ), include_dirs=[ "pandas/_libs/include", ], extra_compile_args=(extra_compile_args), extra_link_args=extra_link_args, define_macros=macros, ) extensions.append(pd_parser_ext) # ---------------------------------------------------------------------- if __name__ == "__main__": # Freeze to support parallel compilation when using spawn instead of fork multiprocessing.freeze_support() setup( version=versioneer.get_version(), ext_modules=maybe_cythonize(extensions, compiler_directives=directives), cmdclass=cmdclass, )