[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "unitxt"
dynamic = ["version"]
description = "Load any mixture of text to text data in one line of code"
authors = [
    { name = "IBM Research", email = "elron.bandel@ibm.com" }
]
requires-python = ">=3.8"
license = { file = "LICENSE" }
readme = "README.md"
classifiers = [
    "Programming Language :: Python :: 3",
    "Operating System :: OS Independent"
]
dependencies = [
    "datasets>=2.16.0",
    "evaluate",
    "scipy>=1.10.1",
    "diskcache"
]

[project.urls]
Homepage = "https://www.unitxt.ai"
Documentation = "https://www.unitxt.ai/en/latest/documentation.html"
Repository = "https://github.com/ibm/unitxt"

# [tool.setuptools.dynamic]
# version = {attr = "src.unitxt.version.version"}

[tool.setuptools]
package-dir = {"" = "src"}
packages = {find = {where = ["src"]}}
include-package-data = true

[tool.setuptools.package-data]
unitxt = ["catalog/**/*.json", "ui/banner.png", "assistant/embeddings.npz", "assistant/metadata.parquet"]

[tool.setuptools.dynamic]
version = {attr = "unitxt.version.version"}

[project.optional-dependencies]
dev = [
    "ruff",
    "pre-commit",
    "detect-secrets",
    "tomli",
    "codespell",
    "fuzzywuzzy",
    "httpretty",
    "psutil"
]
docs = [
    "sphinx_rtd_theme",
    "piccolo_theme",
    "sphinxext-opengraph",
    "datasets>=2.16.0,<4.0",
    "evaluate",
    "nltk",
    "rouge_score",
    "scikit-learn",
    "jiwer",
    "editdistance",
    "fuzzywuzzy",
    "pydantic"
]
helm = [
    "crfm-helm[unitxt]>=0.5.3"
]
service = [
    "torch==1.12.1",
    "fastapi==0.109.0",
    "uvicorn[standard]==0.27.0.post1",
    "python-jose[cryptography]==3.3.0",
    "transformers"
]
tests = [
    "bert_score",
    "transformers",
    "sentence_transformers",
    "ibm-cos-sdk",
    "kaggle==1.6.14",
    "opendatasets",
    "httpretty~=1.1.4",
    "editdistance",
    "rouge-score",
    "nltk",
    "sacrebleu[ko,ja]",
    "scikit-learn<=1.5.2",
    "jiwer",
    "conllu",
    "llama-index-core",
    "llama-index-llms-openai",
    "pytrec-eval",
    "SentencePiece",
    "fuzzywuzzy",
    "openai",
    "ibm-generative-ai",
    "bs4",
    "tenacity==8.3.0",
    "accelerate",
    "func_timeout==4.3.5",
    "Wikipedia-API",
    "sqlglot",
    "sqlparse",
    "diskcache",
    "pydantic",
    "jsonschema_rs"
]
ui = [
    "gradio",
    "transformers"
]
text2sql = [
    "sqlglot",
    "func_timeout==4.3.5",
    "sqlparse",
    "tabulate"
]
watsonx = [
    "ibm-watsonx-ai==1.2.10"
]
inference-tests = [
  "litellm>=1.52.9",
  "tenacity",
  "diskcache",
  "numpy==1.26.4",
  "ollama"
]
assistant = [
    "streamlit",
    "watchdog",
    "litellm"
]
remote_inference = [
    "litellm>=1.52.9",
    "tenacity",
    "diskcache"
]
local_inference = [
    "transformers",
    "torch",
    "accelerate"
]
bluebench = [
    "unitxt[remote_inference]",
    "unitxt[local_inference]",
    "conllu",
    "scikit-learn",
    "sympy",
    "bert_score",
    "nltk",
    "rouge_score",
    "sacrebleu[ko]"
]


# Combine all extras by referencing other groups
all = [
    "unitxt[base]",
    "unitxt[dev]",
    "unitxt[docs]",
    "unitxt[helm]",
    "unitxt[service]",
    "unitxt[tests]",
    "unitxt[ui]",
    "unitxt[watsonx]",
    "unitxt[assistant]",
    "unitxt[text2sql]"
]

[project.scripts]
unitxt-explore = "unitxt.ui:launch"
unitxt-assistant = "unitxt.assistant:launch"
unitxt-metrics-service = "unitxt.service.metrics.main:start_metrics_http_service"
unitxt-evaluate = "unitxt.evaluate_cli:main"
unitxt-summarize = "unitxt.evaluate_cli:summarize_cli"

[tool.ruff]
exclude = [
    ".bzr",
    ".direnv",
    ".eggs",
    ".git",
    ".git-rewrite",
    ".hg",
    ".mypy_cache",
    ".nox",
    ".pants.d",
    ".pytype",
    ".ruff_cache",
    ".svn",
    ".tox",
    ".venv",
    "__pypackages__",
    "_build",
    "buck-out",
    "build",
    "dist",
    "node_modules",
    "venv",
]

line-length = 88
indent-width = 4
target-version = "py38"

[tool.ruff.lint.pyupgrade]
keep-runtime-typing = true

[tool.ruff.lint.per-file-ignores]
"src/*" = ["TID252"]
".github/*" = ["TID251"]
".vscode/*" = ["TID251"]
"tests/*" = ["TID251"]
"utils/*" = ["TID251"]
"src/unitxt/api.py" = ["B904"]
"src/unitxt/metrics.py" = ["C901"]
"src/unitxt/loaders.py" = ["C901"]
"src/unitxt/__init__.py" = ["F811", "F401"]
"src/unitxt/metric.py" = ["F811", "F401"]
"src/unitxt/dataset.py" = ["F811", "F401"]
"src/unitxt/blocks.py" = ["F811", "F401"]
"tests/library/test_loaders.py" = ["N802", "N803", "RUF015"]
"tests/library/test_dataclass.py" = ["F811", "E731"]
"src/unitxt/validate.py" = ["B024"]
"src/unitxt/standard.py" = ["C901"]
"src/unitxt/type_utils.py" = ["C901"]
"src/unitxt/metric_utils.py" = ["C901"]
"src/unitxt/dataclass.py" = ["C901"]
"src/unitxt/operators.py" = ["C901"]
"src/unitxt/utils.py" = ["C901", "N818"]
"docs/conf.py" = ["E402"]
"prepare/cards/attaq_500.py" = ["RUF001"]
"prepare/instructions/models/llama.py" = ["RUF001"]
"utils/hf/prepare_dataset.py" = ["T201"]
"utils/hf/prepare_metric.py" = ["T201"]
"utils/compare_unitxt_datasets_between_versions.py" = ["C901"]
"performance/*.py" = ["T201"]
"src/unitxt/llm_as_judge.py" = ["F401", "C901"]
"examples/*.py" = ["T201"]
"tests/library/test_retry.py" = ["B904", "N818"]
"src/unitxt/tool_calling.py" = ["C901"]

[tool.ruff.lint]
extend-select = [
  "UP",
  "D",
  "F",
  "E",
  "B",
  "C",
  "R",
  "T",
  "TID25",
  "W",
  "RUF100",
  "I",
  "G",
  "N",
  "Q",
  "RUF",
]
ignore = ["E501", "E203", "E722", "D101", "D102", "D103", "D100", "D104", "D105", "D106", "D107", "RUF012", "G004"]
fixable = ["ALL"]
unfixable = []
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.ruff.lint.flake8-bugbear]
extend-immutable-calls = ["fastapi.Depends", "fastapi.params.Depends", "fastapi.Query", "fastapi.params.Query"]

[tool.ruff.lint.flake8-tidy-imports.banned-api]
"src".msg = "Use unitxt outside src/ and relative imports inside src/ and install unitxt from source with `pip install -e '.[dev]'`."

[tool.codespell]
ignore-words-list = 'rouge,ot,ans,nd,cann,som,tha,vie,ment,criterias,atleast,te'
check-filenames = true
check-hidden = false
regex = "(?<![a-z])[a-z'`]+|[A-Z][a-z'`]*|[a-z]+'[a-z]*|[a-z]+(?=[_-])|[a-z]+(?=[A-Z])|\\d+"
skip = '*cards/mt/flores101*,*cards/trec*,*cards/belebele*,*cards/amazon_mass*,*cards/reuters21578*,*cards/attaq_500*,*cards/cohere_for_ai*,*egg-info*,*/logs/*'

[tool.coverage.run]
branch = true
source = ["src"]

[tool.coverage.report]
omit = [
    "src/unitxt/inference.py",
    "src/unitxt/ui/*",
    "src/unitxt/service/*",
    "src/unitxt/assistant/*",
    "src/unitxt/test_utils/*",
    "src/unitxt/dataset.py",
    "src/unitxt/metric.py",
    "src/unitxt/blocks.py",
]
exclude_lines = [
    "logger.",
    "pass",
    "raise ValueError",
    "except Exception"
]

[tool.coverage.xml]
output = "coverage.xml"