{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append('../scripts')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import hydra\n", "from hydra import compose, initialize\n", "\n", "hydra.core.global_hydra.GlobalHydra.instance().clear()\n", "initialize(config_path=Path('..'), job_name='foo', version_base='1.1')\n", "config = compose(config_name='experiment.yaml')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from dataset import load_data\n", "\n", "base_path = Path('..')\n", "train_df, val_df, test_df = load_data(base_path / config.data.cnf_tsv_path, base_path / config.data.controls_tsv_path)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "all_df = pd.concat([train_df, val_df, test_df])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(5336, 1201, 1159, 7696)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_df), len(val_df), len(test_df), len(all_df)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2715, 590, 543, 3848)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Ellipses\n", "(~train_df.controls).sum(), (~val_df.controls).sum(), (~test_df.controls).sum(), (~all_df.controls).sum()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2621, 611, 616, 3848)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Controls\n", "(train_df.controls).sum(), (val_df.controls).sum(), (test_df.controls).sum(), (all_df.controls).sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:ellipses]", "language": "python", "name": "conda-env-ellipses-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "vscode": { "interpreter": { "hash": "8f7616dd95153615ba76d82383ee4b763d06514f4c395e85d9efff1c9a575639" } } }, "nbformat": 4, "nbformat_minor": 4 }