name: Convert apache arrow feather to apache parquet description: |- Converts Apache Arrow Feather to Apache Parquet. [Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html) [Apache Parquet](https://parquet.apache.org/) Annotations: author: Alexey Volkov inputs: - {name: data, type: ApacheArrowFeather} outputs: - {name: output_data, type: ApacheParquet} implementation: container: image: python:3.7 command: - sh - -c - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@" - python3 - -u - -c - | def _make_parent_dirs_and_return_path(file_path: str): import os os.makedirs(os.path.dirname(file_path), exist_ok=True) return file_path def convert_apache_arrow_feather_to_apache_parquet( data_path, output_data_path, ): '''Converts Apache Arrow Feather to Apache Parquet. [Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html) [Apache Parquet](https://parquet.apache.org/) Annotations: author: Alexey Volkov ''' from pyarrow import feather, parquet table = feather.read_table(data_path) parquet.write_table(table, output_data_path) import argparse _parser = argparse.ArgumentParser(prog='Convert apache arrow feather to apache parquet', description='Converts Apache Arrow Feather to Apache Parquet.\n\n [Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov ') _parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS) _parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) _parsed_args = vars(_parser.parse_args()) _output_files = _parsed_args.pop("_output_paths", []) _outputs = convert_apache_arrow_feather_to_apache_parquet(**_parsed_args) _output_serializers = [ ] import os for idx, output_file in enumerate(_output_files): try: os.makedirs(os.path.dirname(output_file)) except OSError: pass with open(output_file, 'w') as f: f.write(_output_serializers[idx](_outputs[idx])) args: - --data - {inputPath: data} - --output-data - {outputPath: output_data}