[build-system] requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" [project] name = "unitxt" dynamic = ["version"] description = "Load any mixture of text to text data in one line of code" authors = [ { name = "IBM Research", email = "elron.bandel@ibm.com" } ] requires-python = ">=3.8" license = { file = "LICENSE" } readme = "README.md" classifiers = [ "Programming Language :: Python :: 3", "Operating System :: OS Independent" ] dependencies = [ "datasets>=2.16.0", "evaluate", "scipy>=1.10.1", "diskcache" ] [project.urls] Homepage = "https://www.unitxt.ai" Documentation = "https://www.unitxt.ai/en/latest/documentation.html" Repository = "https://github.com/ibm/unitxt" # [tool.setuptools.dynamic] # version = {attr = "src.unitxt.version.version"} [tool.setuptools] package-dir = {"" = "src"} packages = {find = {where = ["src"]}} include-package-data = true [tool.setuptools.package-data] unitxt = ["catalog/**/*.json", "ui/banner.png", "assistant/embeddings.npz", "assistant/metadata.parquet"] [tool.setuptools.dynamic] version = {attr = "unitxt.version.version"} [project.optional-dependencies] dev = [ "ruff", "pre-commit", "detect-secrets", "tomli", "codespell", "fuzzywuzzy", "httpretty", "psutil" ] docs = [ "sphinx_rtd_theme", "piccolo_theme", "sphinxext-opengraph", "datasets>=2.16.0,<4.0", "evaluate", "nltk", "rouge_score", "scikit-learn", "jiwer", "editdistance", "fuzzywuzzy", "pydantic" ] helm = [ "crfm-helm[unitxt]>=0.5.3" ] service = [ "torch==1.12.1", "fastapi==0.109.0", "uvicorn[standard]==0.27.0.post1", "python-jose[cryptography]==3.3.0", "transformers" ] tests = [ "bert_score", "transformers", "sentence_transformers", "ibm-cos-sdk", "kaggle==1.6.14", "opendatasets", "httpretty~=1.1.4", "editdistance", "rouge-score", "nltk", "sacrebleu[ko,ja]", "scikit-learn<=1.5.2", "jiwer", "conllu", "llama-index-core", "llama-index-llms-openai", "pytrec-eval", "SentencePiece", "fuzzywuzzy", "openai", "ibm-generative-ai", "bs4", "tenacity==8.3.0", "accelerate", "func_timeout==4.3.5", "Wikipedia-API", "sqlglot", "sqlparse", "diskcache", "pydantic", "jsonschema_rs" ] ui = [ "gradio", "transformers" ] text2sql = [ "sqlglot", "func_timeout==4.3.5", "sqlparse", "tabulate" ] watsonx = [ "ibm-watsonx-ai==1.2.10" ] inference-tests = [ "litellm>=1.52.9", "tenacity", "diskcache", "numpy==1.26.4", "ollama" ] assistant = [ "streamlit", "watchdog", "litellm" ] remote_inference = [ "litellm>=1.52.9", "tenacity", "diskcache" ] local_inference = [ "transformers", "torch", "accelerate" ] bluebench = [ "unitxt[remote_inference]", "unitxt[local_inference]", "conllu", "scikit-learn", "sympy", "bert_score", "nltk", "rouge_score", "sacrebleu[ko]" ] # Combine all extras by referencing other groups all = [ "unitxt[base]", "unitxt[dev]", "unitxt[docs]", "unitxt[helm]", "unitxt[service]", "unitxt[tests]", "unitxt[ui]", "unitxt[watsonx]", "unitxt[assistant]", "unitxt[text2sql]" ] [project.scripts] unitxt-explore = "unitxt.ui:launch" unitxt-assistant = "unitxt.assistant:launch" unitxt-metrics-service = "unitxt.service.metrics.main:start_metrics_http_service" unitxt-evaluate = "unitxt.evaluate_cli:main" unitxt-summarize = "unitxt.evaluate_cli:summarize_cli" [tool.ruff] exclude = [ ".bzr", ".direnv", ".eggs", ".git", ".git-rewrite", ".hg", ".mypy_cache", ".nox", ".pants.d", ".pytype", ".ruff_cache", ".svn", ".tox", ".venv", "__pypackages__", "_build", "buck-out", "build", "dist", "node_modules", "venv", ] line-length = 88 indent-width = 4 target-version = "py38" [tool.ruff.lint.pyupgrade] keep-runtime-typing = true [tool.ruff.lint.per-file-ignores] "src/*" = ["TID252"] ".github/*" = ["TID251"] ".vscode/*" = ["TID251"] "tests/*" = ["TID251"] "utils/*" = ["TID251"] "src/unitxt/api.py" = ["B904"] "src/unitxt/metrics.py" = ["C901"] "src/unitxt/loaders.py" = ["C901"] "src/unitxt/__init__.py" = ["F811", "F401"] "src/unitxt/metric.py" = ["F811", "F401"] "src/unitxt/dataset.py" = ["F811", "F401"] "src/unitxt/blocks.py" = ["F811", "F401"] "tests/library/test_loaders.py" = ["N802", "N803", "RUF015"] "tests/library/test_dataclass.py" = ["F811", "E731"] "src/unitxt/validate.py" = ["B024"] "src/unitxt/standard.py" = ["C901"] "src/unitxt/type_utils.py" = ["C901"] "src/unitxt/metric_utils.py" = ["C901"] "src/unitxt/dataclass.py" = ["C901"] "src/unitxt/operators.py" = ["C901"] "src/unitxt/utils.py" = ["C901", "N818"] "docs/conf.py" = ["E402"] "prepare/cards/attaq_500.py" = ["RUF001"] "prepare/instructions/models/llama.py" = ["RUF001"] "utils/hf/prepare_dataset.py" = ["T201"] "utils/hf/prepare_metric.py" = ["T201"] "utils/compare_unitxt_datasets_between_versions.py" = ["C901"] "performance/*.py" = ["T201"] "src/unitxt/llm_as_judge.py" = ["F401", "C901"] "examples/*.py" = ["T201"] "tests/library/test_retry.py" = ["B904", "N818"] "src/unitxt/tool_calling.py" = ["C901"] [tool.ruff.lint] extend-select = [ "UP", "D", "F", "E", "B", "C", "R", "T", "TID25", "W", "RUF100", "I", "G", "N", "Q", "RUF", ] ignore = ["E501", "E203", "E722", "D101", "D102", "D103", "D100", "D104", "D105", "D106", "D107", "RUF012", "G004"] fixable = ["ALL"] unfixable = [] dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [tool.ruff.format] quote-style = "double" indent-style = "space" skip-magic-trailing-comma = false line-ending = "auto" [tool.ruff.lint.pydocstyle] convention = "google" [tool.ruff.lint.flake8-bugbear] extend-immutable-calls = ["fastapi.Depends", "fastapi.params.Depends", "fastapi.Query", "fastapi.params.Query"] [tool.ruff.lint.flake8-tidy-imports.banned-api] "src".msg = "Use unitxt outside src/ and relative imports inside src/ and install unitxt from source with `pip install -e '.[dev]'`." [tool.codespell] ignore-words-list = 'rouge,ot,ans,nd,cann,som,tha,vie,ment,criterias,atleast,te' check-filenames = true check-hidden = false regex = "(?