{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|default_exp net" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "from fastcore.utils import *\n", "from fastcore.parallel import *\n", "\n", "from functools import wraps\n", "import json,urllib,contextlib\n", "import socket,urllib.request,http,urllib\n", "from contextlib import contextmanager,ExitStack\n", "from urllib.request import Request,urlretrieve,install_opener,HTTPErrorProcessor,HTTPRedirectHandler\n", "from urllib.error import HTTPError,URLError\n", "from urllib.parse import urlencode,urlparse,urlunparse\n", "from http.client import InvalidURL" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fastcore.test import *\n", "from nbdev.showdoc import *\n", "from fastcore.nb_imports import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Network functionality\n", "\n", "> Network, HTTP, and URL functions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## URLs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "url_default_headers = {\n", " \"Accept\":\n", " \"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\",\n", " \"Accept-Language\": \"en-US,en;q=0.9\",\n", " \"Cache-Control\": \"max-age=0\",\n", " \"Sec-Fetch-Dest\": \"document\",\n", " \"Sec-Fetch-Mode\": \"navigate\",\n", " \"Sec-Fetch-Site\": \"none\",\n", " \"Sec-Fetch-User\": \"?1\",\n", " \"Upgrade-Insecure-Requests\": \"1\",\n", " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlquote(url):\n", " \"Update url's path with `urllib.parse.quote`\"\n", " subdelims = \"!$&'()*+,;=\"\n", " gendelims = \":?#[]@\"\n", " safe = subdelims+gendelims+\"%/\"\n", " p = list(urlparse(url))\n", " p[2] = urllib.parse.quote(p[2], safe=safe)\n", " for i in range(3,6): p[i] = urllib.parse.quote(p[i], safe=safe)\n", " return urlunparse(p)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://github.com/fastai/fastai/compare/master@%7B1.day.ago%7D%E2%80%A6master'" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "urlquote(\"https://github.com/fastai/fastai/compare/master@{1.day.ago}…master\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://www.google.com/search?q=%E4%BD%A0%E5%A5%BD'" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "urlquote(\"https://www.google.com/search?q=你好\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlwrap(url, data=None, headers=None):\n", " \"Wrap `url` in a urllib `Request` with `urlquote`\"\n", " return url if isinstance(url,Request) else Request(urlquote(url), data=data, headers=headers or {})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "ExceptionsHTTP = {}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "class HTTP4xxClientError(HTTPError):\n", " \"Base class for client exceptions (code 4xx) from `url*` functions\"\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "class HTTP5xxServerError(HTTPError):\n", " \"Base class for server exceptions (code 5xx) from `url*` functions\"\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "---\n", "\n", "[source](https://github.com/fastai/fastcore/blob/master/fastcore/net.py#L64){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "#### HTTP4xxClientError\n", "\n", "> HTTP4xxClientError (url, code, msg, hdrs, fp)\n", "\n", "*Base class for client exceptions (code 4xx) from `url*` functions*" ], "text/plain": [ "---\n", "\n", "[source](https://github.com/fastai/fastcore/blob/master/fastcore/net.py#L64){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "#### HTTP4xxClientError\n", "\n", "> HTTP4xxClientError (url, code, msg, hdrs, fp)\n", "\n", "*Base class for client exceptions (code 4xx) from `url*` functions*" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "show_doc(HTTP4xxClientError, title_level=4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "---\n", "\n", "[source](https://github.com/fastai/fastcore/blob/master/fastcore/net.py#L69){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "#### HTTP5xxServerError\n", "\n", "> HTTP5xxServerError (url, code, msg, hdrs, fp)\n", "\n", "*Base class for server exceptions (code 5xx) from `url*` functions*" ], "text/plain": [ "---\n", "\n", "[source](https://github.com/fastai/fastcore/blob/master/fastcore/net.py#L69){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "#### HTTP5xxServerError\n", "\n", "> HTTP5xxServerError (url, code, msg, hdrs, fp)\n", "\n", "*Base class for server exceptions (code 5xx) from `url*` functions*" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "show_doc(HTTP5xxServerError, title_level=4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlopener():\n", " _opener = urllib.request.build_opener()\n", " _opener.addheaders = list(url_default_headers.items())\n", " return _opener" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "# install_opener(_opener)\n", "\n", "_httperrors = (\n", " (400,'Bad Request'),(401,'Unauthorized'),(402,'Payment Required'),(403,'Forbidden'),(404,'Not Found'),\n", " (405,'Method Not Allowed'),(406,'Not Acceptable'),(407,'Proxy Auth Required'),(408,'Request Timeout'),\n", " (409,'Conflict'),(410,'Gone'),(411,'Length Required'),(412,'Precondition Failed'),(413,'Payload Too Large'),\n", " (414,'URI Too Long'),(415,'Unsupported Media Type'),(416,'Range Not Satisfiable'),(417,'Expectation Failed'),\n", " (418,'Am A teapot'),(421,'Misdirected Request'),(422,'Unprocessable Entity'),(423,'Locked'),(424,'Failed Dependency'),\n", " (425,'Too Early'),(426,'Upgrade Required'),(428,'Precondition Required'),(429,'Too Many Requests'),\n", " (431,'Header Fields Too Large'),(451,'Legal Reasons')\n", ")\n", "\n", "for code,msg in _httperrors:\n", " nm = f'HTTP{code}{msg.replace(\" \",\"\")}Error'\n", " def _init(self, url, hdrs, fp, msg=msg, code=code): HTTP4xxClientError.__init__(self, url, code, msg, hdrs, fp)\n", " cls = type(nm, (HTTP4xxClientError,), {'__init__':_init})\n", " globals()[nm] = ExceptionsHTTP[code] = cls" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "_all_ = ['HTTP400BadRequestError', 'HTTP401UnauthorizedError', 'HTTP402PaymentRequiredError', 'HTTP403ForbiddenError', 'HTTP404NotFoundError', 'HTTP405MethodNotAllowedError', 'HTTP406NotAcceptableError', 'HTTP407ProxyAuthRequiredError', 'HTTP408RequestTimeoutError', 'HTTP409ConflictError', 'HTTP410GoneError', 'HTTP411LengthRequiredError', 'HTTP412PreconditionFailedError', 'HTTP413PayloadTooLargeError', 'HTTP414URITooLongError', 'HTTP415UnsupportedMediaTypeError', 'HTTP416RangeNotSatisfiableError', 'HTTP417ExpectationFailedError', 'HTTP418AmAteapotError', 'HTTP421MisdirectedRequestError', 'HTTP422UnprocessableEntityError', 'HTTP423LockedError', 'HTTP424FailedDependencyError', 'HTTP425TooEarlyError', 'HTTP426UpgradeRequiredError', 'HTTP428PreconditionRequiredError', 'HTTP429TooManyRequestsError', 'HTTP431HeaderFieldsTooLargeError', 'HTTP451LegalReasonsError']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlopen(url, data=None, headers=None, timeout=None, **kwargs):\n", " \"Like `urllib.request.urlopen`, but first `urlwrap` the `url`, and encode `data`\"\n", " if kwargs and not data: data=kwargs\n", " if data is not None:\n", " if not isinstance(data, (str,bytes)): data = urlencode(data)\n", " if not isinstance(data, bytes): data = data.encode('ascii')\n", " try: return urlopener().open(urlwrap(url, data=data, headers=headers), timeout=timeout)\n", " except HTTPError as e: \n", " e.msg += f\"\\n====Error Body====\\n{e.read().decode(errors='ignore')}\"\n", " raise" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "With `urlopen`, the body of the response will also be returned in addition to the message if there is an error:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "404 Not Found\n", "====Error Body====\n", "{\n", " \"message\": \"Not Found\",\n", " \"documentation_url\": \"https://docs.github.com/rest\"\n", "}\n", "\n" ] } ], "source": [ "try: urlopen('https://api.github.com/v3')\n", "except HTTPError as e: \n", " print(e.code, e.msg)\n", " assert 'documentation_url' in e.msg" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlread(url, data=None, headers=None, decode=True, return_json=False, return_headers=False, timeout=None, **kwargs):\n", " \"Retrieve `url`, using `data` dict or `kwargs` to `POST` if present\"\n", " try:\n", " with urlopen(url, data=data, headers=headers, timeout=timeout, **kwargs) as u: res,hdrs = u.read(),u.headers\n", " except HTTPError as e:\n", " if 400 <= e.code < 500: raise ExceptionsHTTP[e.code](e.url, e.hdrs, e.fp, msg=e.msg) from None\n", " else: raise\n", "\n", " if decode: res = res.decode()\n", " if return_json: res = loads(res)\n", " return (res,dict(hdrs)) if return_headers else res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urljson(url, data=None, timeout=None):\n", " \"Retrieve `url` and decode json\"\n", " res = urlread(url, data=data, timeout=timeout)\n", " return json.loads(res) if res else {}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(urljson('https://httpbin.org/get')['headers']['User-Agent'], url_default_headers['User-Agent'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlcheck(url, headers=None, timeout=10):\n", " if not url: return True\n", " try:\n", " with urlopen(url, headers=headers, timeout=timeout) as u: return u.status<400\n", " except URLError: return False\n", " except socket.timeout: return False\n", " except InvalidURL: return False" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlclean(url):\n", " \"Remove fragment, params, and querystring from `url` if present\"\n", " return urlunparse(urlparse(str(url))[:3]+('','',''))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(urlclean('http://a.com/b?c=1#d'), 'http://a.com/b')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlretrieve(url, filename=None, reporthook=None, data=None, headers=None, timeout=None):\n", " \"Same as `urllib.request.urlretrieve` but also works with `Request` objects\"\n", " with contextlib.closing(urlopen(url, data, headers=headers, timeout=timeout)) as fp:\n", " headers = fp.info()\n", " if filename: tfp = open(filename, 'wb')\n", " else:\n", " tfp = tempfile.NamedTemporaryFile(delete=False)\n", " filename = tfp.name\n", "\n", " with tfp:\n", " bs,size,read,blocknum = 1024*8,-1,0,0\n", " if \"content-length\" in headers: size = int(headers[\"Content-Length\"])\n", " if reporthook: reporthook(blocknum, bs, size)\n", " while True:\n", " block = fp.read(bs)\n", " if not block: break\n", " read += len(block)\n", " tfp.write(block)\n", " blocknum += 1\n", " if reporthook: reporthook(blocknum, bs, size)\n", "\n", " if size >= 0 and read < size:\n", " raise ContentTooShortError(f\"retrieval incomplete: got only {read} out of {size} bytes\", headers)\n", " return filename,headers" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urldest(url, dest=None):\n", " name = urlclean(Path(url).name)\n", " if dest is None: dest = name\n", " dest = Path(dest)\n", " return dest/name if dest.is_dir() else dest" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlsave(url, dest=None, reporthook=None, headers=None, timeout=None):\n", " \"Retrieve `url` and save based on its name\"\n", " dest = urldest(url, dest)\n", " dest.parent.mkdir(parents=True, exist_ok=True)\n", " nm,msg = urlretrieve(url, dest, reporthook, headers=headers, timeout=timeout)\n", " return nm" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#skip\n", "with tempfile.TemporaryDirectory() as d: urlsave('http://www.google.com/index.html', d)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlvalid(x):\n", " \"Test if `x` is a valid URL\"\n", " return all (getattrs(urlparse(str(x)), 'scheme', 'netloc'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "assert urlvalid('http://www.google.com/')\n", "assert not urlvalid('www.google.com/')\n", "assert not urlvalid(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlrequest(url, verb, headers=None, route=None, query=None, data=None, json_data=True):\n", " \"`Request` for `url` with optional route params replaced by `route`, plus `query` string, and post `data`\"\n", " if route: url = url.format(**route)\n", " if query: url += '?' + urlencode(query)\n", " if isinstance(data,dict): data = (json.dumps if json_data else urlencode)(data).encode('ascii')\n", " return Request(url, headers=headers or {}, data=data or None, method=verb.upper())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hdr = {'Hdr1':'1', 'Hdr2':'2'}\n", "req = urlrequest('http://example.com/{foo}/1', 'POST',\n", " headers=hdr, route={'foo':'3'}, query={'q':'4'}, data={'d':'5'})\n", "\n", "test_eq(req.headers, hdr)\n", "test_eq(req.full_url, 'http://example.com/3/1?q=4')\n", "test_eq(req.method, 'POST')\n", "test_eq(req.data, b'{\"d\": \"5\"}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "req = urlrequest('http://example.com/{foo}/1', 'POST', data={'d':'5','e':'6'}, headers=hdr, json_data=False)\n", "test_eq(req.data, b'd=5&e=6')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "@patch\n", "def summary(self:Request, skip=None)->dict:\n", " \"Summary containing full_url, headers, method, and data, removing `skip` from headers\"\n", " res = L('full_url','method','data').map_dict(partial(getattr,self))\n", " res['headers'] = {k:v for k,v in self.headers.items() if k not in listify(skip)}\n", " return res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'full_url': 'http://example.com/{foo}/1',\n", " 'method': 'POST',\n", " 'data': b'd=5&e=6',\n", " 'headers': {'Hdr2': '2'}}" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "req.summary(skip='Hdr1')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def urlsend(url, verb, headers=None, route=None, query=None, data=None, json_data=True,\n", " return_json=True, return_headers=False, debug=None, timeout=None):\n", " \"Send request with `urlrequest`, converting result to json if `return_json`\"\n", " req = urlrequest(url, verb, headers, route=route, query=query, data=data, json_data=json_data)\n", " if debug: debug(req)\n", "\n", " if route and route.get('archive_format', None):\n", " return urlread(req, decode=False, return_json=False, return_headers=return_headers, timeout=timeout)\n", "\n", " return urlread(req, return_json=return_json, return_headers=return_headers, timeout=timeout)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def do_request(url, post=False, headers=None, **data):\n", " \"Call GET or json-encoded POST on `url`, depending on `post`\"\n", " if data:\n", " if post: data = json.dumps(data).encode('ascii')\n", " else:\n", " url += \"?\" + urlencode(data)\n", " data = None\n", " return urljson(Request(url, headers=headers, data=data or None))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Basic client/server" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def _socket_det(port,host,dgram):\n", " if isinstance(port,int): family,addr = socket.AF_INET,(host or socket.gethostname(),port)\n", " else: family,addr = socket.AF_UNIX,port\n", " return family,addr,(socket.SOCK_STREAM,socket.SOCK_DGRAM)[dgram]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def start_server(port, host=None, dgram=False, reuse_addr=True, n_queue=None):\n", " \"Create a `socket` server on `port`, with optional `host`, of type `dgram`\"\n", " listen_args = [n_queue] if n_queue else []\n", " family,addr,typ = _socket_det(port,host,dgram)\n", " if family==socket.AF_UNIX:\n", " if os.path.exists(addr): os.unlink(addr)\n", " assert not os.path.exists(addr), f\"{addr} in use\"\n", " s = socket.socket(family, typ)\n", " if reuse_addr and family==socket.AF_INET: s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)\n", " s.bind(addr)\n", " s.listen(*listen_args)\n", " return s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can create a TCP client and server pass an int as `port` and optional `host`. `host` defaults to your main network interface if not provided. You can create a Unix socket client and server by passing a string to `port`. A `SOCK_STREAM` socket is created by default, unless you pass `dgram=True`, in which case a `SOCK_DGRAM` socket is created. `n_queue` sets the listening queue size." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def start_client(port, host=None, dgram=False):\n", " \"Create a `socket` client on `port`, with optional `host`, of type `dgram`\"\n", " family,addr,typ = _socket_det(port,host,dgram)\n", " s = socket.socket(family, typ)\n", " s.connect(addr)\n", " return s" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def tobytes(s:str)->bytes:\n", " \"Convert `s` into HTTP-ready bytes format\"\n", " return s.replace('\\n', '\\r\\n').encode('utf-8')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(tobytes('foo\\nbar'), b'foo\\r\\nbar')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "def http_response(body=None, status=200, hdrs=None, **kwargs):\n", " \"Create an HTTP-ready response, adding `kwargs` to `hdrs`\"\n", " kwargs = {k.replace('_','-'):v for k,v in kwargs.items()}\n", " hdrs = hdrs or {}\n", " hdrs = {**hdrs, **kwargs}\n", " status_line = f\"HTTP/1.1 {status} OK\\n\"\n", " if body: hdrs['Content-Length'] = len(body)\n", " headers = ''.join([f\"{k}: {v}\\n\" for k, v in hdrs.items()])\n", " return tobytes(status_line+headers+\"\\n\" + str(body))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "exp = b'HTTP/1.1 200 OK\\r\\nUser-Agent: me\\r\\nContent-Length: 4\\r\\n\\r\\nbody'\n", "test_eq(http_response('body', 200, User_Agent='me'), exp)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "@threaded\n", "def recv_once(host:str='localhost', port:int=8000):\n", " \"Spawn a thread to receive a single HTTP request and store in `d['r']`\"\n", " conn,addr = start_server(port,host).accept()\n", " res = conn.recv(1024)\n", " conn.sendall(http_response(res))\n", " return res" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Export -" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|hide\n", "from nbdev import nbdev_export\n", "nbdev_export()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "jupytext": { "split_at_heading": true }, "kernelspec": { "display_name": "python3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 4 }