{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sharing pretrained models (PyTorch)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Install the Transformers, Datasets, and Evaluate libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install datasets evaluate transformers[sentencepiece]\n", "!apt install git-lfs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You will need to setup git, adapt your email and name in the following cell." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!git config --global user.email \"you@example.com\"\n", "!git config --global user.name \"Your Name\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import notebook_login\n", "\n", "notebook_login()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import notebook_login\n", "\n", "notebook_login()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import TrainingArguments\n", "\n", "training_args = TrainingArguments(\n", " \"bert-finetuned-mrpc\", save_strategy=\"epoch\", push_to_hub=True\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoModelForMaskedLM, AutoTokenizer\n", "\n", "checkpoint = \"camembert-base\"\n", "\n", "model = AutoModelForMaskedLM.from_pretrained(checkpoint)\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.push_to_hub(\"dummy-model\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tokenizer.push_to_hub(\"dummy-model\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tokenizer.push_to_hub(\"dummy-model\", organization=\"huggingface\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tokenizer.push_to_hub(\"dummy-model\", organization=\"huggingface\", use_auth_token=\"\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import (\n", " # User management\n", " login,\n", " logout,\n", " whoami,\n", "\n", " # Repository creation and management\n", " create_repo,\n", " delete_repo,\n", " update_repo_visibility,\n", "\n", " # And some methods to retrieve/change information about the content\n", " list_models,\n", " list_datasets,\n", " list_metrics,\n", " list_repo_files,\n", " upload_file,\n", " delete_file,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import create_repo\n", "\n", "create_repo(\"dummy-model\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import create_repo\n", "\n", "create_repo(\"dummy-model\", organization=\"huggingface\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import upload_file\n", "\n", "upload_file(\n", " \"/config.json\",\n", " path_in_repo=\"config.json\",\n", " repo_id=\"/dummy-model\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import Repository\n", "\n", "repo = Repository(\"\", clone_from=\"/dummy-model\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "repo.git_pull()\n", "repo.git_add()\n", "repo.git_commit()\n", "repo.git_push()\n", "repo.git_tag()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "repo.git_pull()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.save_pretrained(\"\")\n", "tokenizer.save_pretrained(\"\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "repo.git_add()\n", "repo.git_commit(\"Add model and tokenizer files\")\n", "repo.git_push()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoModelForMaskedLM, AutoTokenizer\n", "\n", "checkpoint = \"camembert-base\"\n", "\n", "model = AutoModelForMaskedLM.from_pretrained(checkpoint)\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", "\n", "# Do whatever with the model, train it, fine-tune it...\n", "\n", "model.save_pretrained(\"\")\n", "tokenizer.save_pretrained(\"\")" ] } ], "metadata": { "colab": { "name": "Sharing pretrained models (PyTorch)", "provenance": [] } }, "nbformat": 4, "nbformat_minor": 4 }