{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sharing pretrained models (PyTorch)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Install the Transformers, Datasets, and Evaluate libraries to run this notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install datasets evaluate transformers[sentencepiece]\n",
    "!apt install git-lfs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You will need to setup git, adapt your email and name in the following cell."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!git config --global user.email \"you@example.com\"\n",
    "!git config --global user.name \"Your Name\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import notebook_login\n",
    "\n",
    "notebook_login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import notebook_login\n",
    "\n",
    "notebook_login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import TrainingArguments\n",
    "\n",
    "training_args = TrainingArguments(\n",
    "    \"bert-finetuned-mrpc\", save_strategy=\"epoch\", push_to_hub=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoModelForMaskedLM, AutoTokenizer\n",
    "\n",
    "checkpoint = \"camembert-base\"\n",
    "\n",
    "model = AutoModelForMaskedLM.from_pretrained(checkpoint)\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.push_to_hub(\"dummy-model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer.push_to_hub(\"dummy-model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer.push_to_hub(\"dummy-model\", organization=\"huggingface\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer.push_to_hub(\"dummy-model\", organization=\"huggingface\", use_auth_token=\"<TOKEN>\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import (\n",
    "    # User management\n",
    "    login,\n",
    "    logout,\n",
    "    whoami,\n",
    "\n",
    "    # Repository creation and management\n",
    "    create_repo,\n",
    "    delete_repo,\n",
    "    update_repo_visibility,\n",
    "\n",
    "    # And some methods to retrieve/change information about the content\n",
    "    list_models,\n",
    "    list_datasets,\n",
    "    list_metrics,\n",
    "    list_repo_files,\n",
    "    upload_file,\n",
    "    delete_file,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import create_repo\n",
    "\n",
    "create_repo(\"dummy-model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import create_repo\n",
    "\n",
    "create_repo(\"dummy-model\", organization=\"huggingface\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import upload_file\n",
    "\n",
    "upload_file(\n",
    "    \"<path_to_file>/config.json\",\n",
    "    path_in_repo=\"config.json\",\n",
    "    repo_id=\"<namespace>/dummy-model\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import Repository\n",
    "\n",
    "repo = Repository(\"<path_to_dummy_folder>\", clone_from=\"<namespace>/dummy-model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "repo.git_pull()\n",
    "repo.git_add()\n",
    "repo.git_commit()\n",
    "repo.git_push()\n",
    "repo.git_tag()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "repo.git_pull()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save_pretrained(\"<path_to_dummy_folder>\")\n",
    "tokenizer.save_pretrained(\"<path_to_dummy_folder>\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "repo.git_add()\n",
    "repo.git_commit(\"Add model and tokenizer files\")\n",
    "repo.git_push()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoModelForMaskedLM, AutoTokenizer\n",
    "\n",
    "checkpoint = \"camembert-base\"\n",
    "\n",
    "model = AutoModelForMaskedLM.from_pretrained(checkpoint)\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
    "\n",
    "# Do whatever with the model, train it, fine-tune it...\n",
    "\n",
    "model.save_pretrained(\"<path_to_dummy_folder>\")\n",
    "tokenizer.save_pretrained(\"<path_to_dummy_folder>\")"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "Sharing pretrained models (PyTorch)",
   "provenance": []
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}