{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#default_exp net" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from fastcore.imports import *\n", "from fastcore.foundation import *\n", "from fastcore.basics import *\n", "from fastcore.xtras import *\n", "from fastcore.parallel import *\n", "from functools import wraps\n", "\n", "import json,urllib,contextlib\n", "import socket,urllib.request,http,urllib\n", "from contextlib import contextmanager,ExitStack\n", "from urllib.request import Request,urlretrieve,install_opener\n", "from urllib.error import HTTPError,URLError\n", "from urllib.parse import urlencode,urlparse,urlunparse\n", "from http.client import InvalidURL" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fastcore.test import *\n", "from nbdev.showdoc import *\n", "from fastcore.nb_imports import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Network functionality\n", "\n", "> Network, HTTP, and URL functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "url_default_headers = {\n", " \"Accept\":\n", " \"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\",\n", " \"Accept-Language\": \"en-US,en;q=0.9\",\n", " \"Cache-Control\": \"max-age=0\",\n", " \"Sec-Fetch-Dest\": \"document\",\n", " \"Sec-Fetch-Mode\": \"navigate\",\n", " \"Sec-Fetch-Site\": \"none\",\n", " \"Sec-Fetch-User\": \"?1\",\n", " \"Upgrade-Insecure-Requests\": \"1\",\n", " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlquote(url):\n", " \"Update url's path with `urllib.parse.quote`\"\n", " subdelims = \"!$&'()*+,;=\"\n", " gendelims = \":?#[]@\"\n", " safe = subdelims+gendelims+\"%/\"\n", " p = list(urlparse(url))\n", " p[2] = urllib.parse.quote(p[2], safe=safe)\n", " for i in range(3,6): p[i] = urllib.parse.quote(p[i], safe=safe)\n", " return urlunparse(p)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://github.com/fastai/fastai/compare/master@%7B1.day.ago%7D%E2%80%A6master'" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "urlquote(\"https://github.com/fastai/fastai/compare/master@{1.day.ago}…master\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://www.google.com/search?q=%E4%BD%A0%E5%A5%BD'" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "urlquote(\"https://www.google.com/search?q=你好\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlwrap(url, data=None, headers=None):\n", " \"Wrap `url` in a urllib `Request` with `urlquote`\"\n", " return url if isinstance(url,Request) else Request(urlquote(url), data=data, headers=headers or {})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "ExceptionsHTTP = {}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class HTTP4xxClientError(HTTPError):\n", " \"Base class for client exceptions (code 4xx) from `url*` functions\"\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class HTTP5xxServerError(HTTPError):\n", " \"Base class for server exceptions (code 5xx) from `url*` functions\"\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

class HTTP4xxClientError[source]

\n", "\n", "> HTTP4xxClientError(**`url`**, **`code`**, **`msg`**, **`hdrs`**, **`fp`**) :: `HTTPError`\n", "\n", "Base class for client exceptions (code 4xx) from `url*` functions" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(HTTP4xxClientError, title_level=4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

class HTTP5xxServerError[source]

\n", "\n", "> HTTP5xxServerError(**`url`**, **`code`**, **`msg`**, **`hdrs`**, **`fp`**) :: `HTTPError`\n", "\n", "Base class for server exceptions (code 5xx) from `url*` functions" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(HTTP5xxServerError, title_level=4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_opener = urllib.request.build_opener()\n", "_opener.addheaders = list(url_default_headers.items())\n", "install_opener(_opener)\n", "\n", "_httperrors = (\n", " (400,'Bad Request'),(401,'Unauthorized'),(402,'Payment Required'),(403,'Forbidden'),(404,'Not Found'),\n", " (405,'Method Not Allowed'),(406,'Not Acceptable'),(407,'Proxy Auth Required'),(408,'Request Timeout'),\n", " (409,'Conflict'),(410,'Gone'),(411,'Length Required'),(412,'Precondition Failed'),(413,'Payload Too Large'),\n", " (414,'URI Too Long'),(415,'Unsupported Media Type'),(416,'Range Not Satisfiable'),(417,'Expectation Failed'),\n", " (418,'Am A teapot'),(421,'Misdirected Request'),(422,'Unprocessable Entity'),(423,'Locked'),(424,'Failed Dependency'),\n", " (425,'Too Early'),(426,'Upgrade Required'),(428,'Precondition Required'),(429,'Too Many Requests'),\n", " (431,'Header Fields Too Large'),(451,'Legal Reasons')\n", ")\n", "\n", "for code,msg in _httperrors:\n", " nm = f'HTTP{code}{msg.replace(\" \",\"\")}Error'\n", " cls = get_class(nm, 'url', 'hdrs', 'fp', sup=HTTP4xxClientError, msg=msg, code=code)\n", " globals()[nm] = ExceptionsHTTP[code] = cls" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_all_ = ['HTTP400BadRequestError', 'HTTP401UnauthorizedError', 'HTTP402PaymentRequiredError', 'HTTP403ForbiddenError', 'HTTP404NotFoundError', 'HTTP405MethodNotAllowedError', 'HTTP406NotAcceptableError', 'HTTP407ProxyAuthRequiredError', 'HTTP408RequestTimeoutError', 'HTTP409ConflictError', 'HTTP410GoneError', 'HTTP411LengthRequiredError', 'HTTP412PreconditionFailedError', 'HTTP413PayloadTooLargeError', 'HTTP414URITooLongError', 'HTTP415UnsupportedMediaTypeError', 'HTTP416RangeNotSatisfiableError', 'HTTP417ExpectationFailedError', 'HTTP418AmAteapotError', 'HTTP421MisdirectedRequestError', 'HTTP422UnprocessableEntityError', 'HTTP423LockedError', 'HTTP424FailedDependencyError', 'HTTP425TooEarlyError', 'HTTP426UpgradeRequiredError', 'HTTP428PreconditionRequiredError', 'HTTP429TooManyRequestsError', 'HTTP431HeaderFieldsTooLargeError', 'HTTP451LegalReasonsError']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlopen(url, data=None, headers=None, timeout=None, **kwargs):\n", " \"Like `urllib.request.urlopen`, but first `urlwrap` the `url`, and encode `data`\"\n", " if kwargs and not data: data=kwargs\n", " if data is not None:\n", " if not isinstance(data, (str,bytes)): data = urlencode(data)\n", " if not isinstance(data, bytes): data = data.encode('ascii')\n", " return _opener.open(urlwrap(url, data=data, headers=headers), timeout=timeout)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlread(url, data=None, headers=None, decode=True, return_json=False, return_headers=False, timeout=None, **kwargs):\n", " \"Retrieve `url`, using `data` dict or `kwargs` to `POST` if present\"\n", " try:\n", " with urlopen(url, data=data, headers=headers, timeout=timeout, **kwargs) as u: res,hdrs = u.read(),u.headers\n", " except HTTPError as e:\n", " if 400 <= e.code < 500: raise ExceptionsHTTP[e.code](e.url, e.hdrs, e.fp) from None\n", " else: raise\n", "\n", " if decode: res = res.decode()\n", " if return_json: res = loads(res)\n", " return (res,dict(hdrs)) if return_headers else res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urljson(url, data=None, timeout=None):\n", " \"Retrieve `url` and decode json\"\n", " res = urlread(url, data=data, timeout=timeout)\n", " return json.loads(res) if res else {}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(urljson('https://httpbin.org/get')['headers']['User-Agent'], url_default_headers['User-Agent'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlcheck(url, timeout=10):\n", " if not url: return True\n", " try:\n", " with urlopen(url, timeout=timeout) as u: return u.status<400\n", " except URLError: return False\n", " except socket.timeout: return False\n", " except InvalidURL: return False" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlclean(url):\n", " \"Remove fragment, params, and querystring from `url` if present\"\n", " return urlunparse(urlparse(str(url))[:3]+('','',''))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(urlclean('http://a.com/b?c=1#d'), 'http://a.com/b')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlretrieve(url, filename=None, reporthook=None, data=None, timeout=None):\n", " \"Same as `urllib.request.urlretrieve` but also works with `Request` objects\"\n", " with contextlib.closing(urlopen(url, data, timeout=timeout)) as fp:\n", " headers = fp.info()\n", " if filename: tfp = open(filename, 'wb')\n", " else:\n", " tfp = tempfile.NamedTemporaryFile(delete=False)\n", " filename = tfp.name\n", "\n", " with tfp:\n", " bs,size,read,blocknum = 1024*8,-1,0,0\n", " if \"content-length\" in headers: size = int(headers[\"Content-Length\"])\n", " if reporthook: reporthook(blocknum, bs, size)\n", " while True:\n", " block = fp.read(bs)\n", " if not block: break\n", " read += len(block)\n", " tfp.write(block)\n", " blocknum += 1\n", " if reporthook: reporthook(blocknum, bs, size)\n", "\n", " if size >= 0 and read < size:\n", " raise ContentTooShortError(f\"retrieval incomplete: got only {read} out of {size} bytes\", headers)\n", " return filename,headers" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urldest(url, dest=None):\n", " name = urlclean(Path(url).name)\n", " if dest is None: dest = name\n", " dest = Path(dest)\n", " return dest/name if dest.is_dir() else dest" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlsave(url, dest=None, reporthook=None, timeout=None):\n", " \"Retrieve `url` and save based on its name\"\n", " dest = urldest(url, dest)\n", " dest.parent.mkdir(parents=True, exist_ok=True)\n", " nm,msg = urlretrieve(url, dest, reporthook, timeout=timeout)\n", " return nm" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#skip\n", "with tempfile.TemporaryDirectory() as d: urlsave('http://www.google.com/index.html', d)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlvalid(x):\n", " \"Test if `x` is a valid URL\"\n", " return all (getattrs(urlparse(str(x)), 'scheme', 'netloc'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "assert urlvalid('http://www.google.com/')\n", "assert not urlvalid('www.google.com/')\n", "assert not urlvalid(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlrequest(url, verb, headers=None, route=None, query=None, data=None, json_data=True):\n", " \"`Request` for `url` with optional route params replaced by `route`, plus `query` string, and post `data`\"\n", " if route: url = url.format(**route)\n", " if query: url += '?' + urlencode(query)\n", " if isinstance(data,dict): data = (json.dumps if json_data else urlencode)(data).encode('ascii')\n", " return Request(url, headers=headers or {}, data=data or None, method=verb.upper())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hdr = {'Hdr1':'1', 'Hdr2':'2'}\n", "req = urlrequest('http://example.com/{foo}/1', 'POST',\n", " headers=hdr, route={'foo':'3'}, query={'q':'4'}, data={'d':'5'})\n", "\n", "test_eq(req.headers, hdr)\n", "test_eq(req.full_url, 'http://example.com/3/1?q=4')\n", "test_eq(req.method, 'POST')\n", "test_eq(req.data, b'{\"d\": \"5\"}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "req = urlrequest('http://example.com/{foo}/1', 'POST', data={'d':'5','e':'6'}, headers=hdr, json_data=False)\n", "test_eq(req.data, b'd=5&e=6')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "@patch\n", "def summary(self:Request, skip=None)->dict:\n", " \"Summary containing full_url, headers, method, and data, removing `skip` from headers\"\n", " res = L('full_url','method','data').map_dict(partial(getattr,self))\n", " res['headers'] = {k:v for k,v in self.headers.items() if k not in listify(skip)}\n", " return res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'full_url': 'http://example.com/{foo}/1',\n", " 'method': 'POST',\n", " 'data': b'd=5&e=6',\n", " 'headers': {'Hdr2': '2'}}" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "req.summary(skip='Hdr1')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def urlsend(url, verb, headers=None, route=None, query=None, data=None, json_data=True,\n", " return_json=True, return_headers=False, debug=None):\n", " \"Send request with `urlrequest`, converting result to json if `return_json`\"\n", " req = urlrequest(url, verb, headers, route=route, query=query, data=data, json_data=json_data)\n", " if debug: debug(req)\n", "\n", " if route and route.get('archive_format', None):\n", " return urlread(req, decode=False, return_json=False, return_headers=return_headers)\n", "\n", " return urlread(req, return_json=return_json, return_headers=return_headers)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Test [ghapi.actions.download_artifact](https://docs.github.com/rest/reference/actions#download-an-artifact)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "url = \"https://github.com/fastai/ghapi/archive/refs/tags/0.1.15.zip\"\n", "header={'Accept': 'application/vnd.github.v3+json'}\n", "route={'artifact_id': 1234, 'archive_format': 'zip'} \n", "\n", "res = urlsend(url, \"get\", headers=header, route=route)\n", "assert type(res) is bytes\n", "\n", "def _org_urlsend(url, verb, headers=None, route=None, query=None, data=None, json_data=True,\n", " return_json=True, return_headers=False, debug=None):\n", " req = urlrequest(url, verb, headers, route=route, query=query, data=data, json_data=json_data)\n", " if debug: debug(req)\n", " return urlread(req, return_json=return_json, return_headers=return_headers)\n", "\n", "test_fail(_org_urlsend, args=dict(url=url, verb=\"get\", headers=header, route=route), msg=\"'utf-8' codec can't decode\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def do_request(url, post=False, headers=None, **data):\n", " \"Call GET or json-encoded POST on `url`, depending on `post`\"\n", " if data:\n", " if post: data = json.dumps(data).encode('ascii')\n", " else:\n", " url += \"?\" + urlencode(data)\n", " data = None\n", " return urljson(Request(url, headers=headers, data=data or None))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def _socket_det(port,host,dgram):\n", " if isinstance(port,int): family,addr = socket.AF_INET,(host or socket.gethostname(),port)\n", " else: family,addr = socket.AF_UNIX,port\n", " return family,addr,(socket.SOCK_STREAM,socket.SOCK_DGRAM)[dgram]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def start_server(port, host=None, dgram=False, reuse_addr=True, n_queue=None):\n", " \"Create a `socket` server on `port`, with optional `host`, of type `dgram`\"\n", " listen_args = [n_queue] if n_queue else []\n", " family,addr,typ = _socket_det(port,host,dgram)\n", " if family==socket.AF_UNIX:\n", " if os.path.exists(addr): os.unlink(addr)\n", " assert not os.path.exists(addr), f\"{addr} in use\"\n", " s = socket.socket(family, typ)\n", " if reuse_addr and family==socket.AF_INET: s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)\n", " s.bind(addr)\n", " s.listen(*listen_args)\n", " return s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can create a TCP client and server pass an int as `port` and optional `host`. `host` defaults to your main network interface if not provided. You can create a Unix socket client and server by passing a string to `port`. A `SOCK_STREAM` socket is created by default, unless you pass `dgram=True`, in which case a `SOCK_DGRAM` socket is created. `n_queue` sets the listening queue size." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def start_client(port, host=None, dgram=False):\n", " \"Create a `socket` client on `port`, with optional `host`, of type `dgram`\"\n", " family,addr,typ = _socket_det(port,host,dgram)\n", " s = socket.socket(family, typ)\n", " s.connect(addr)\n", " return s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Export -" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Converted 00_test.ipynb.\n", "Converted 01_basics.ipynb.\n", "Converted 02_foundation.ipynb.\n", "Converted 03_xtras.ipynb.\n", "Converted 03a_parallel.ipynb.\n", "Converted 03b_net.ipynb.\n", "Converted 04_dispatch.ipynb.\n", "Converted 05_transform.ipynb.\n", "Converted 06_docments.ipynb.\n", "Converted 07_meta.ipynb.\n", "Converted 08_script.ipynb.\n", "Converted index.ipynb.\n", "Converted parallel_win.ipynb.\n" ] } ], "source": [ "#hide\n", "from nbdev.export import notebook2script\n", "notebook2script()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "jupytext": { "split_at_heading": true }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 4 }