name: Xgboost predict description: |- Make predictions using a trained XGBoost model. Args: data_path: Path for the feature data in CSV format. model_path: Path for the trained model in binary XGBoost format. predictions_path: Output path for the predictions. label_column: Column containing the label data. Annotations: author: Alexey Volkov inputs: - {name: data, type: CSV} - {name: model, type: XGBoostModel} - {name: label_column, type: Integer, optional: true} outputs: - {name: predictions, type: Text} implementation: container: image: python:3.7 command: - sh - -c - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5' --user) && "$0" "$@" - python3 - -u - -c - | def _make_parent_dirs_and_return_path(file_path: str): import os os.makedirs(os.path.dirname(file_path), exist_ok=True) return file_path def xgboost_predict( data_path, # Also supports LibSVM model_path, predictions_path, label_column = None, ): '''Make predictions using a trained XGBoost model. Args: data_path: Path for the feature data in CSV format. model_path: Path for the trained model in binary XGBoost format. predictions_path: Output path for the predictions. label_column: Column containing the label data. Annotations: author: Alexey Volkov ''' from pathlib import Path import numpy import pandas import xgboost df = pandas.read_csv( data_path, ) if label_column is not None: df = df.drop(columns=[df.columns[label_column]]) testing_data = xgboost.DMatrix( data=df, ) model = xgboost.Booster(model_file=model_path) predictions = model.predict(testing_data) Path(predictions_path).parent.mkdir(parents=True, exist_ok=True) numpy.savetxt(predictions_path, predictions) import argparse _parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions using a trained XGBoost model.\n\n Args:\n data_path: Path for the feature data in CSV format.\n model_path: Path for the trained model in binary XGBoost format.\n predictions_path: Output path for the predictions.\n label_column: Column containing the label data.\n\n Annotations:\n author: Alexey Volkov ') _parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS) _parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS) _parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS) _parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) _parsed_args = vars(_parser.parse_args()) _outputs = xgboost_predict(**_parsed_args) args: - --data - {inputPath: data} - --model - {inputPath: model} - if: cond: {isPresent: label_column} then: - --label-column - {inputValue: label_column} - --predictions - {outputPath: predictions}