01:
\n",
"
+02: from math import pi, acos, cos, sin
\n",
"
__pyx_t_1 = PyList_New(4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __Pyx_INCREF(__pyx_n_s_pi);\n",
" __Pyx_GIVEREF(__pyx_n_s_pi);\n",
" PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_pi);\n",
" __Pyx_INCREF(__pyx_n_s_acos);\n",
" __Pyx_GIVEREF(__pyx_n_s_acos);\n",
" PyList_SET_ITEM(__pyx_t_1, 1, __pyx_n_s_acos);\n",
" __Pyx_INCREF(__pyx_n_s_cos);\n",
" __Pyx_GIVEREF(__pyx_n_s_cos);\n",
" PyList_SET_ITEM(__pyx_t_1, 2, __pyx_n_s_cos);\n",
" __Pyx_INCREF(__pyx_n_s_sin);\n",
" __Pyx_GIVEREF(__pyx_n_s_sin);\n",
" PyList_SET_ITEM(__pyx_t_1, 3, __pyx_n_s_sin);\n",
" __pyx_t_2 = __Pyx_Import(__pyx_n_s_math, __pyx_t_1, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_pi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_pi, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_acos); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_acos, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_cos); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_cos, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_sin); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_sin, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;\n",
"
03:
\n",
"
04:
\n",
"
+05: def great_circle(lon1, lat1, lon2, lat2):
\n",
"
/* Python wrapper */\n",
"static PyObject *__pyx_pw_46_cython_magic_510139e97843e1ad4066ec2ca94da783_1great_circle(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/\n",
"static PyMethodDef __pyx_mdef_46_cython_magic_510139e97843e1ad4066ec2ca94da783_1great_circle = {\"great_circle\", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_46_cython_magic_510139e97843e1ad4066ec2ca94da783_1great_circle, METH_VARARGS|METH_KEYWORDS, 0};\n",
"static PyObject *__pyx_pw_46_cython_magic_510139e97843e1ad4066ec2ca94da783_1great_circle(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {\n",
" PyObject *__pyx_v_lon1 = 0;\n",
" PyObject *__pyx_v_lat1 = 0;\n",
" PyObject *__pyx_v_lon2 = 0;\n",
" PyObject *__pyx_v_lat2 = 0;\n",
" PyObject *__pyx_r = 0;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"great_circle (wrapper)\", 0);\n",
" {\n",
" static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_lon1,&__pyx_n_s_lat1,&__pyx_n_s_lon2,&__pyx_n_s_lat2,0};\n",
" PyObject* values[4] = {0,0,0,0};\n",
" if (unlikely(__pyx_kwds)) {\n",
" Py_ssize_t kw_args;\n",
" const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);\n",
" switch (pos_args) {\n",
" case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);\n",
" CYTHON_FALLTHROUGH;\n",
" case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);\n",
" CYTHON_FALLTHROUGH;\n",
" case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);\n",
" CYTHON_FALLTHROUGH;\n",
" case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);\n",
" CYTHON_FALLTHROUGH;\n",
" case 0: break;\n",
" default: goto __pyx_L5_argtuple_error;\n",
" }\n",
" kw_args = PyDict_Size(__pyx_kwds);\n",
" switch (pos_args) {\n",
" case 0:\n",
" if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lon1)) != 0)) kw_args--;\n",
" else goto __pyx_L5_argtuple_error;\n",
" CYTHON_FALLTHROUGH;\n",
" case 1:\n",
" if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lat1)) != 0)) kw_args--;\n",
" else {\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, 1); __PYX_ERR(0, 5, __pyx_L3_error)\n",
" }\n",
" CYTHON_FALLTHROUGH;\n",
" case 2:\n",
" if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lon2)) != 0)) kw_args--;\n",
" else {\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, 2); __PYX_ERR(0, 5, __pyx_L3_error)\n",
" }\n",
" CYTHON_FALLTHROUGH;\n",
" case 3:\n",
" if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lat2)) != 0)) kw_args--;\n",
" else {\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, 3); __PYX_ERR(0, 5, __pyx_L3_error)\n",
" }\n",
" }\n",
" if (unlikely(kw_args > 0)) {\n",
" if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, \"great_circle\") < 0)) __PYX_ERR(0, 5, __pyx_L3_error)\n",
" }\n",
" } else if (PyTuple_GET_SIZE(__pyx_args) != 4) {\n",
" goto __pyx_L5_argtuple_error;\n",
" } else {\n",
" values[0] = PyTuple_GET_ITEM(__pyx_args, 0);\n",
" values[1] = PyTuple_GET_ITEM(__pyx_args, 1);\n",
" values[2] = PyTuple_GET_ITEM(__pyx_args, 2);\n",
" values[3] = PyTuple_GET_ITEM(__pyx_args, 3);\n",
" }\n",
" __pyx_v_lon1 = values[0];\n",
" __pyx_v_lat1 = values[1];\n",
" __pyx_v_lon2 = values[2];\n",
" __pyx_v_lat2 = values[3];\n",
" }\n",
" goto __pyx_L4_argument_unpacking_done;\n",
" __pyx_L5_argtuple_error:;\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 5, __pyx_L3_error)\n",
" __pyx_L3_error:;\n",
" __Pyx_AddTraceback(\"_cython_magic_510139e97843e1ad4066ec2ca94da783.great_circle\", __pyx_clineno, __pyx_lineno, __pyx_filename);\n",
" __Pyx_RefNannyFinishContext();\n",
" return NULL;\n",
" __pyx_L4_argument_unpacking_done:;\n",
" __pyx_r = __pyx_pf_46_cython_magic_510139e97843e1ad4066ec2ca94da783_great_circle(__pyx_self, __pyx_v_lon1, __pyx_v_lat1, __pyx_v_lon2, __pyx_v_lat2);\n",
"\n",
" /* function exit code */\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"\n",
"static PyObject *__pyx_pf_46_cython_magic_510139e97843e1ad4066ec2ca94da783_great_circle(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_lon1, PyObject *__pyx_v_lat1, PyObject *__pyx_v_lon2, PyObject *__pyx_v_lat2) {\n",
" PyObject *__pyx_v_radius = NULL;\n",
" PyObject *__pyx_v_x = NULL;\n",
" PyObject *__pyx_v_a = NULL;\n",
" PyObject *__pyx_v_b = NULL;\n",
" PyObject *__pyx_v_theta = NULL;\n",
" PyObject *__pyx_v_c = NULL;\n",
" PyObject *__pyx_r = NULL;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"great_circle\", 0);\n",
"/* … */\n",
" /* function exit code */\n",
" __pyx_L1_error:;\n",
" __Pyx_XDECREF(__pyx_t_1);\n",
" __Pyx_XDECREF(__pyx_t_2);\n",
" __Pyx_XDECREF(__pyx_t_3);\n",
" __Pyx_XDECREF(__pyx_t_4);\n",
" __Pyx_XDECREF(__pyx_t_5);\n",
" __Pyx_XDECREF(__pyx_t_6);\n",
" __Pyx_XDECREF(__pyx_t_7);\n",
" __Pyx_AddTraceback(\"_cython_magic_510139e97843e1ad4066ec2ca94da783.great_circle\", __pyx_clineno, __pyx_lineno, __pyx_filename);\n",
" __pyx_r = NULL;\n",
" __pyx_L0:;\n",
" __Pyx_XDECREF(__pyx_v_radius);\n",
" __Pyx_XDECREF(__pyx_v_x);\n",
" __Pyx_XDECREF(__pyx_v_a);\n",
" __Pyx_XDECREF(__pyx_v_b);\n",
" __Pyx_XDECREF(__pyx_v_theta);\n",
" __Pyx_XDECREF(__pyx_v_c);\n",
" __Pyx_XGIVEREF(__pyx_r);\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"/* … */\n",
" __pyx_tuple_ = PyTuple_Pack(10, __pyx_n_s_lon1, __pyx_n_s_lat1, __pyx_n_s_lon2, __pyx_n_s_lat2, __pyx_n_s_radius, __pyx_n_s_x, __pyx_n_s_a, __pyx_n_s_b, __pyx_n_s_theta, __pyx_n_s_c); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 5, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_tuple_);\n",
" __Pyx_GIVEREF(__pyx_tuple_);\n",
"/* … */\n",
" __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_510139e97843e1ad4066ec2ca94da783_1great_circle, NULL, __pyx_n_s_cython_magic_510139e97843e1ad40); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 5, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_great_circle, __pyx_t_2) < 0) __PYX_ERR(0, 5, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;\n",
"
+06: radius = 6371 # 公里
\n",
"
__Pyx_INCREF(__pyx_int_6371);\n",
" __pyx_v_radius = __pyx_int_6371;\n",
"
+07: x = pi / 180
\n",
"
__Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_pi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __pyx_t_2 = __Pyx_PyInt_TrueDivideObjC(__pyx_t_1, __pyx_int_180, 0xB4, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 7, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_v_x = __pyx_t_2;\n",
" __pyx_t_2 = 0;\n",
"
08:
\n",
"
+09: a = (90 - lat1) * (x)
\n",
"
__pyx_t_2 = __Pyx_PyInt_SubtractCObj(__pyx_int_90, __pyx_v_lat1, 90, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __pyx_t_1 = PyNumber_Multiply(__pyx_t_2, __pyx_v_x); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 9, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;\n",
" __pyx_v_a = __pyx_t_1;\n",
" __pyx_t_1 = 0;\n",
"
+10: b = (90 - lat2) * (x)
\n",
"
__pyx_t_1 = __Pyx_PyInt_SubtractCObj(__pyx_int_90, __pyx_v_lat2, 90, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __pyx_t_2 = PyNumber_Multiply(__pyx_t_1, __pyx_v_x); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_v_b = __pyx_t_2;\n",
" __pyx_t_2 = 0;\n",
"
+11: theta = (lon2 - lon1) * (x)
\n",
"
__pyx_t_2 = PyNumber_Subtract(__pyx_v_lon2, __pyx_v_lon1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __pyx_t_1 = PyNumber_Multiply(__pyx_t_2, __pyx_v_x); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 11, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;\n",
" __pyx_v_theta = __pyx_t_1;\n",
" __pyx_t_1 = 0;\n",
"
+12: c = acos((cos(a) * cos(b)) + (sin(a) * sin(b) * cos(theta)))
\n",
"
__Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_acos); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_cos); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __pyx_t_5 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) {\n",
" __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);\n",
" if (likely(__pyx_t_5)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);\n",
" __Pyx_INCREF(__pyx_t_5);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_4, function);\n",
" }\n",
" }\n",
" __pyx_t_3 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_5, __pyx_v_a) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_a);\n",
" __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;\n",
" if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_3);\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_cos); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_5);\n",
" __pyx_t_6 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_5))) {\n",
" __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5);\n",
" if (likely(__pyx_t_6)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);\n",
" __Pyx_INCREF(__pyx_t_6);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_5, function);\n",
" }\n",
" }\n",
" __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_v_b) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_v_b);\n",
" __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;\n",
" if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;\n",
" __pyx_t_5 = PyNumber_Multiply(__pyx_t_3, __pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_5);\n",
" __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_sin); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_3);\n",
" __pyx_t_6 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) {\n",
" __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_3);\n",
" if (likely(__pyx_t_6)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);\n",
" __Pyx_INCREF(__pyx_t_6);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_3, function);\n",
" }\n",
" }\n",
" __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_6, __pyx_v_a) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_v_a);\n",
" __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;\n",
" if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;\n",
" __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_sin); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_6);\n",
" __pyx_t_7 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_6))) {\n",
" __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_6);\n",
" if (likely(__pyx_t_7)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6);\n",
" __Pyx_INCREF(__pyx_t_7);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_6, function);\n",
" }\n",
" }\n",
" __pyx_t_3 = (__pyx_t_7) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_7, __pyx_v_b) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_b);\n",
" __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;\n",
" if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_3);\n",
" __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;\n",
" __pyx_t_6 = PyNumber_Multiply(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_6);\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;\n",
" __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_cos); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __pyx_t_7 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) {\n",
" __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_4);\n",
" if (likely(__pyx_t_7)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);\n",
" __Pyx_INCREF(__pyx_t_7);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_4, function);\n",
" }\n",
" }\n",
" __pyx_t_3 = (__pyx_t_7) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_7, __pyx_v_theta) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_theta);\n",
" __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;\n",
" if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_3);\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __pyx_t_4 = PyNumber_Multiply(__pyx_t_6, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;\n",
" __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;\n",
" __pyx_t_3 = PyNumber_Add(__pyx_t_5, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_3);\n",
" __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __pyx_t_4 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {\n",
" __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);\n",
" if (likely(__pyx_t_4)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);\n",
" __Pyx_INCREF(__pyx_t_4);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_2, function);\n",
" }\n",
" }\n",
" __pyx_t_1 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_4, __pyx_t_3) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3);\n",
" __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;\n",
" if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 12, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;\n",
" __pyx_v_c = __pyx_t_1;\n",
" __pyx_t_1 = 0;\n",
"
+13: return radius * c
\n",
"
__Pyx_XDECREF(__pyx_r);\n",
" __pyx_t_1 = PyNumber_Multiply(__pyx_v_radius, __pyx_v_c); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __pyx_r = __pyx_t_1;\n",
" __pyx_t_1 = 0;\n",
" goto __pyx_L0;\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%cython -a\n",
"\n",
"from math import pi, acos, cos, sin\n",
"\n",
"\n",
"def great_circle(lon1, lat1, lon2, lat2):\n",
" radius = 6371 # 公里\n",
" x = pi / 180\n",
"\n",
" a = (90 - lat1) * (x)\n",
" b = (90 - lat2) * (x)\n",
" theta = (lon2 - lon1) * (x)\n",
" c = acos((cos(a) * cos(b)) + (sin(a) * sin(b) * cos(theta)))\n",
" return radius * c"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Cython编译"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:12:46.091960Z",
"start_time": "2020-05-14T07:12:46.084755Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing great_circle_cy_v1.pyx\n"
]
}
],
"source": [
"%%file great_circle_cy_v1.pyx\n",
"from math import pi, acos, cos, sin\n",
"\n",
"def great_circle(double lon1, double lat1, double lon2, double lat2):\n",
" cdef double a, b, theta, c, x, radius\n",
" \n",
" radius = 6371 # 公里\n",
" x = pi/180\n",
"\n",
" a = (90-lat1)*(x)\n",
" b = (90-lat2)*(x)\n",
" theta = (lon2-lon1)*(x)\n",
" c = acos((cos(a)*cos(b)) + (sin(a)*sin(b)*cos(theta)))\n",
" return radius*c"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:13:42.512623Z",
"start_time": "2020-05-14T07:13:42.505435Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing great_circle_setup_v1.py\n"
]
}
],
"source": [
"%%file great_circle_setup_v1.py\n",
"from distutils.core import setup\n",
"from Cython.Build import cythonize\n",
"\n",
"setup(\n",
" name='Great Circle module v1',\n",
" ext_modules=cythonize(\"great_circle_cy_v1.pyx\"),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:13:46.874030Z",
"start_time": "2020-05-14T07:13:45.557078Z"
},
"scrolled": false,
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Compiling great_circle_cy_v1.pyx because it changed.\n",
"[1/1] Cythonizing great_circle_cy_v1.pyx\n",
"/home/junjiet/conda/lib/python3.7/site-packages/Cython/Compiler/Main.py:369: FutureWarning: Cython directive 'language_level' not set, using 2 for now (Py2). This will change in a later release! File: /home/junjiet/data_science2020/2.数据处理/test_cython/great_circle_cy_v1.pyx\n",
" tree = Parsing.p_module(s, pxd, full_module_name)\n",
"running build_ext\n",
"building 'great_circle_cy_v1' extension\n",
"/home/junjiet/conda/bin/x86_64-conda_cos6-linux-gnu-cc -Wno-unused-result -Wsign-compare -DNDEBUG -fwrapv -O2 -Wall -Wstrict-prototypes -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/junjiet/conda/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/junjiet/conda/include -fPIC -I/home/junjiet/conda/include/python3.7m -c great_circle_cy_v1.c -o build/temp.linux-x86_64-3.7/great_circle_cy_v1.o\n",
"x86_64-conda_cos6-linux-gnu-gcc -pthread -shared -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,-rpath,/home/junjiet/conda/lib -L/home/junjiet/conda/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,-rpath,/home/junjiet/conda/lib -L/home/junjiet/conda/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,-rpath,/home/junjiet/conda/lib -Wl,-rpath-link,/home/junjiet/conda/lib -L/home/junjiet/conda/lib -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/junjiet/conda/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/junjiet/conda/include build/temp.linux-x86_64-3.7/great_circle_cy_v1.o -o /home/junjiet/data_science2020/2.数据处理/test_cython/great_circle_cy_v1.cpython-37m-x86_64-linux-gnu.so\n"
]
}
],
"source": [
"!python great_circle_setup_v1.py build_ext --inplace"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:13:48.268971Z",
"start_time": "2020-05-14T07:13:48.139436Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[0m\u001b[01;34mbuild\u001b[0m/\r\n",
"great_circle_cy_v1.c\r\n",
"\u001b[01;32mgreat_circle_cy_v1.cpython-37m-x86_64-linux-gnu.so\u001b[0m*\r\n",
"great_circle_cy_v1.pyx\r\n",
"great_circle_py.py\r\n",
"great_circle_setup_v1.py\r\n",
"\u001b[01;34m__pycache__\u001b[0m/\r\n",
"setup.py\r\n",
"test.c\r\n",
"\u001b[01;32mtest.cpython-37m-x86_64-linux-gnu.so\u001b[0m*\r\n",
"test.pyx\r\n"
]
}
],
"source": [
"ls"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:14:25.334488Z",
"start_time": "2020-05-14T07:14:22.509546Z"
}
},
"outputs": [],
"source": [
"from great_circle_cy_v1 import great_circle\n",
"for i in range(num):\n",
" great_circle(lon1, lat1, lon2, lat2)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:15:03.011688Z",
"start_time": "2020-05-14T07:15:02.997463Z"
},
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
"\n",
" \n",
" Cython: _cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55.pyx\n",
" \n",
"\n",
"\n",
"Generated by Cython 0.29.15
\n",
"\n",
" Yellow lines hint at Python interaction.
\n",
" Click on a line that starts with a \"+
\" to see the C code that Cython generated for it.\n",
"
\n",
" 01:
\n",
"
+02: from math import pi, acos, cos, sin
\n",
"
__pyx_t_1 = PyList_New(4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __Pyx_INCREF(__pyx_n_s_pi);\n",
" __Pyx_GIVEREF(__pyx_n_s_pi);\n",
" PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_pi);\n",
" __Pyx_INCREF(__pyx_n_s_acos);\n",
" __Pyx_GIVEREF(__pyx_n_s_acos);\n",
" PyList_SET_ITEM(__pyx_t_1, 1, __pyx_n_s_acos);\n",
" __Pyx_INCREF(__pyx_n_s_cos);\n",
" __Pyx_GIVEREF(__pyx_n_s_cos);\n",
" PyList_SET_ITEM(__pyx_t_1, 2, __pyx_n_s_cos);\n",
" __Pyx_INCREF(__pyx_n_s_sin);\n",
" __Pyx_GIVEREF(__pyx_n_s_sin);\n",
" PyList_SET_ITEM(__pyx_t_1, 3, __pyx_n_s_sin);\n",
" __pyx_t_2 = __Pyx_Import(__pyx_n_s_math, __pyx_t_1, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_pi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_pi, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_acos); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_acos, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_cos); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_cos, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_sin); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_sin, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;\n",
"
03:
\n",
"
+04: def great_circle(double lon1, double lat1, double lon2, double lat2):
\n",
"
/* Python wrapper */\n",
"static PyObject *__pyx_pw_46_cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55_1great_circle(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/\n",
"static PyMethodDef __pyx_mdef_46_cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55_1great_circle = {\"great_circle\", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_46_cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55_1great_circle, METH_VARARGS|METH_KEYWORDS, 0};\n",
"static PyObject *__pyx_pw_46_cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55_1great_circle(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {\n",
" double __pyx_v_lon1;\n",
" double __pyx_v_lat1;\n",
" double __pyx_v_lon2;\n",
" double __pyx_v_lat2;\n",
" PyObject *__pyx_r = 0;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"great_circle (wrapper)\", 0);\n",
" {\n",
" static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_lon1,&__pyx_n_s_lat1,&__pyx_n_s_lon2,&__pyx_n_s_lat2,0};\n",
" PyObject* values[4] = {0,0,0,0};\n",
" if (unlikely(__pyx_kwds)) {\n",
" Py_ssize_t kw_args;\n",
" const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);\n",
" switch (pos_args) {\n",
" case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);\n",
" CYTHON_FALLTHROUGH;\n",
" case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);\n",
" CYTHON_FALLTHROUGH;\n",
" case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);\n",
" CYTHON_FALLTHROUGH;\n",
" case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);\n",
" CYTHON_FALLTHROUGH;\n",
" case 0: break;\n",
" default: goto __pyx_L5_argtuple_error;\n",
" }\n",
" kw_args = PyDict_Size(__pyx_kwds);\n",
" switch (pos_args) {\n",
" case 0:\n",
" if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lon1)) != 0)) kw_args--;\n",
" else goto __pyx_L5_argtuple_error;\n",
" CYTHON_FALLTHROUGH;\n",
" case 1:\n",
" if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lat1)) != 0)) kw_args--;\n",
" else {\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, 1); __PYX_ERR(0, 4, __pyx_L3_error)\n",
" }\n",
" CYTHON_FALLTHROUGH;\n",
" case 2:\n",
" if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lon2)) != 0)) kw_args--;\n",
" else {\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, 2); __PYX_ERR(0, 4, __pyx_L3_error)\n",
" }\n",
" CYTHON_FALLTHROUGH;\n",
" case 3:\n",
" if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lat2)) != 0)) kw_args--;\n",
" else {\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, 3); __PYX_ERR(0, 4, __pyx_L3_error)\n",
" }\n",
" }\n",
" if (unlikely(kw_args > 0)) {\n",
" if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, \"great_circle\") < 0)) __PYX_ERR(0, 4, __pyx_L3_error)\n",
" }\n",
" } else if (PyTuple_GET_SIZE(__pyx_args) != 4) {\n",
" goto __pyx_L5_argtuple_error;\n",
" } else {\n",
" values[0] = PyTuple_GET_ITEM(__pyx_args, 0);\n",
" values[1] = PyTuple_GET_ITEM(__pyx_args, 1);\n",
" values[2] = PyTuple_GET_ITEM(__pyx_args, 2);\n",
" values[3] = PyTuple_GET_ITEM(__pyx_args, 3);\n",
" }\n",
" __pyx_v_lon1 = __pyx_PyFloat_AsDouble(values[0]); if (unlikely((__pyx_v_lon1 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 4, __pyx_L3_error)\n",
" __pyx_v_lat1 = __pyx_PyFloat_AsDouble(values[1]); if (unlikely((__pyx_v_lat1 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 4, __pyx_L3_error)\n",
" __pyx_v_lon2 = __pyx_PyFloat_AsDouble(values[2]); if (unlikely((__pyx_v_lon2 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 4, __pyx_L3_error)\n",
" __pyx_v_lat2 = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_lat2 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 4, __pyx_L3_error)\n",
" }\n",
" goto __pyx_L4_argument_unpacking_done;\n",
" __pyx_L5_argtuple_error:;\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 4, __pyx_L3_error)\n",
" __pyx_L3_error:;\n",
" __Pyx_AddTraceback(\"_cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55.great_circle\", __pyx_clineno, __pyx_lineno, __pyx_filename);\n",
" __Pyx_RefNannyFinishContext();\n",
" return NULL;\n",
" __pyx_L4_argument_unpacking_done:;\n",
" __pyx_r = __pyx_pf_46_cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55_great_circle(__pyx_self, __pyx_v_lon1, __pyx_v_lat1, __pyx_v_lon2, __pyx_v_lat2);\n",
"\n",
" /* function exit code */\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"\n",
"static PyObject *__pyx_pf_46_cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55_great_circle(CYTHON_UNUSED PyObject *__pyx_self, double __pyx_v_lon1, double __pyx_v_lat1, double __pyx_v_lon2, double __pyx_v_lat2) {\n",
" double __pyx_v_a;\n",
" double __pyx_v_b;\n",
" double __pyx_v_theta;\n",
" double __pyx_v_c;\n",
" double __pyx_v_x;\n",
" double __pyx_v_radius;\n",
" PyObject *__pyx_r = NULL;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"great_circle\", 0);\n",
"/* … */\n",
" /* function exit code */\n",
" __pyx_L1_error:;\n",
" __Pyx_XDECREF(__pyx_t_1);\n",
" __Pyx_XDECREF(__pyx_t_2);\n",
" __Pyx_XDECREF(__pyx_t_4);\n",
" __Pyx_XDECREF(__pyx_t_5);\n",
" __Pyx_XDECREF(__pyx_t_6);\n",
" __Pyx_XDECREF(__pyx_t_7);\n",
" __Pyx_XDECREF(__pyx_t_8);\n",
" __Pyx_XDECREF(__pyx_t_9);\n",
" __Pyx_AddTraceback(\"_cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55.great_circle\", __pyx_clineno, __pyx_lineno, __pyx_filename);\n",
" __pyx_r = NULL;\n",
" __pyx_L0:;\n",
" __Pyx_XGIVEREF(__pyx_r);\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"/* … */\n",
" __pyx_tuple_ = PyTuple_Pack(10, __pyx_n_s_lon1, __pyx_n_s_lat1, __pyx_n_s_lon2, __pyx_n_s_lat2, __pyx_n_s_a, __pyx_n_s_b, __pyx_n_s_theta, __pyx_n_s_c, __pyx_n_s_x, __pyx_n_s_radius); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 4, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_tuple_);\n",
" __Pyx_GIVEREF(__pyx_tuple_);\n",
"/* … */\n",
" __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_a8c9eb2e14c0c5fef8bdedbf1ab48c55_1great_circle, NULL, __pyx_n_s_cython_magic_a8c9eb2e14c0c5fef8); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 4, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" if (PyDict_SetItem(__pyx_d, __pyx_n_s_great_circle, __pyx_t_2) < 0) __PYX_ERR(0, 4, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;\n",
"
05: cdef double a, b, theta, c, x, radius
\n",
"
06:
\n",
"
+07: radius = 6371 # 公里
\n",
"
__pyx_v_radius = 6371.0;\n",
"
+08: x = pi/180
\n",
"
__Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_pi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __pyx_t_2 = __Pyx_PyInt_TrueDivideObjC(__pyx_t_1, __pyx_int_180, 0xB4, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 8, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_3 = __pyx_PyFloat_AsDouble(__pyx_t_2); if (unlikely((__pyx_t_3 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 8, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;\n",
" __pyx_v_x = __pyx_t_3;\n",
"
09:
\n",
"
+10: a = (90-lat1)*(x)
\n",
"
__pyx_v_a = ((90.0 - __pyx_v_lat1) * __pyx_v_x);\n",
"
+11: b = (90-lat2)*(x)
\n",
"
__pyx_v_b = ((90.0 - __pyx_v_lat2) * __pyx_v_x);\n",
"
+12: theta = (lon2-lon1)*(x)
\n",
"
__pyx_v_theta = ((__pyx_v_lon2 - __pyx_v_lon1) * __pyx_v_x);\n",
"
+13: c = acos((cos(a)*cos(b)) + (sin(a)*sin(b)*cos(theta)))
\n",
"
__Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_acos); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_cos); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_5);\n",
" __pyx_t_6 = PyFloat_FromDouble(__pyx_v_a); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_6);\n",
" __pyx_t_7 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_5))) {\n",
" __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_5);\n",
" if (likely(__pyx_t_7)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);\n",
" __Pyx_INCREF(__pyx_t_7);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_5, function);\n",
" }\n",
" }\n",
" __pyx_t_4 = (__pyx_t_7) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_7, __pyx_t_6) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_6);\n",
" __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;\n",
" __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;\n",
" if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;\n",
" __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_cos); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_6);\n",
" __pyx_t_7 = PyFloat_FromDouble(__pyx_v_b); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_7);\n",
" __pyx_t_8 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_6))) {\n",
" __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_6);\n",
" if (likely(__pyx_t_8)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6);\n",
" __Pyx_INCREF(__pyx_t_8);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_6, function);\n",
" }\n",
" }\n",
" __pyx_t_5 = (__pyx_t_8) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_8, __pyx_t_7) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_7);\n",
" __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0;\n",
" __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;\n",
" if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_5);\n",
" __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;\n",
" __pyx_t_6 = PyNumber_Multiply(__pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_6);\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;\n",
" __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_sin); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __pyx_t_7 = PyFloat_FromDouble(__pyx_v_a); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_7);\n",
" __pyx_t_8 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) {\n",
" __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_4);\n",
" if (likely(__pyx_t_8)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);\n",
" __Pyx_INCREF(__pyx_t_8);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_4, function);\n",
" }\n",
" }\n",
" __pyx_t_5 = (__pyx_t_8) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_8, __pyx_t_7) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_7);\n",
" __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0;\n",
" __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;\n",
" if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_5);\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __Pyx_GetModuleGlobalName(__pyx_t_7, __pyx_n_s_sin); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_7);\n",
" __pyx_t_8 = PyFloat_FromDouble(__pyx_v_b); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_8);\n",
" __pyx_t_9 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_7))) {\n",
" __pyx_t_9 = PyMethod_GET_SELF(__pyx_t_7);\n",
" if (likely(__pyx_t_9)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);\n",
" __Pyx_INCREF(__pyx_t_9);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_7, function);\n",
" }\n",
" }\n",
" __pyx_t_4 = (__pyx_t_9) ? __Pyx_PyObject_Call2Args(__pyx_t_7, __pyx_t_9, __pyx_t_8) : __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_t_8);\n",
" __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0;\n",
" __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;\n",
" if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;\n",
" __pyx_t_7 = PyNumber_Multiply(__pyx_t_5, __pyx_t_4); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_7);\n",
" __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_cos); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_5);\n",
" __pyx_t_8 = PyFloat_FromDouble(__pyx_v_theta); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_8);\n",
" __pyx_t_9 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_5))) {\n",
" __pyx_t_9 = PyMethod_GET_SELF(__pyx_t_5);\n",
" if (likely(__pyx_t_9)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);\n",
" __Pyx_INCREF(__pyx_t_9);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_5, function);\n",
" }\n",
" }\n",
" __pyx_t_4 = (__pyx_t_9) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_9, __pyx_t_8) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_8);\n",
" __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0;\n",
" __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;\n",
" if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;\n",
" __pyx_t_5 = PyNumber_Multiply(__pyx_t_7, __pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_5);\n",
" __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" __pyx_t_4 = PyNumber_Add(__pyx_t_6, __pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_4);\n",
" __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;\n",
" __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;\n",
" __pyx_t_5 = NULL;\n",
" if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_1))) {\n",
" __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1);\n",
" if (likely(__pyx_t_5)) {\n",
" PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);\n",
" __Pyx_INCREF(__pyx_t_5);\n",
" __Pyx_INCREF(function);\n",
" __Pyx_DECREF_SET(__pyx_t_1, function);\n",
" }\n",
" }\n",
" __pyx_t_2 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_1, __pyx_t_5, __pyx_t_4) : __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_4);\n",
" __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;\n",
" __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;\n",
" if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;\n",
" __pyx_t_3 = __pyx_PyFloat_AsDouble(__pyx_t_2); if (unlikely((__pyx_t_3 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 13, __pyx_L1_error)\n",
" __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;\n",
" __pyx_v_c = __pyx_t_3;\n",
"
+14: return radius*c
\n",
"
__Pyx_XDECREF(__pyx_r);\n",
" __pyx_t_2 = PyFloat_FromDouble((__pyx_v_radius * __pyx_v_c)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 14, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_2);\n",
" __pyx_r = __pyx_t_2;\n",
" __pyx_t_2 = 0;\n",
" goto __pyx_L0;\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%cython -a\n",
"\n",
"from math import pi, acos, cos, sin\n",
"\n",
"def great_circle(double lon1, double lat1, double lon2, double lat2):\n",
" cdef double a, b, theta, c, x, radius\n",
" \n",
" radius = 6371 # 公里\n",
" x = pi/180\n",
"\n",
" a = (90-lat1)*(x)\n",
" b = (90-lat2)*(x)\n",
" theta = (lon2-lon1)*(x)\n",
" c = acos((cos(a)*cos(b)) + (sin(a)*sin(b)*cos(theta)))\n",
" return radius*c"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### C标准库函数"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:15:30.208339Z",
"start_time": "2020-05-14T07:15:30.200784Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing great_circle_cy_v2.pyx\n"
]
}
],
"source": [
"%%file great_circle_cy_v2.pyx\n",
"cdef extern from \"math.h\":\n",
" float cosf(float theta)\n",
" float sinf(float theta)\n",
" float acosf(float theta)\n",
"\n",
"cpdef double great_circle(double lon1, double lat1, double lon2, double lat2):\n",
" cdef double a, b, theta, c, x, radius\n",
" cdef double pi = 3.141592653589793\n",
"\n",
" radius = 6371 # 公里\n",
" x = pi/180\n",
"\n",
" a = (90-lat1)*(x)\n",
" b = (90-lat2)*(x)\n",
" theta = (lon2-lon1)*(x)\n",
" c = acosf((cosf(a)*cosf(b)) + (sinf(a)*sinf(b)*cosf(theta)))\n",
" return radius*c"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:15:36.151667Z",
"start_time": "2020-05-14T07:15:36.144605Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing great_circle_setup_v2.py\n"
]
}
],
"source": [
"%%file great_circle_setup_v2.py\n",
"from distutils.core import setup\n",
"from Cython.Build import cythonize\n",
"\n",
"setup(\n",
" name=\"Great Circle module v2\", ext_modules=cythonize(\"great_circle_cy_v2.pyx\"),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:15:42.128861Z",
"start_time": "2020-05-14T07:15:41.145366Z"
},
"scrolled": false,
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Compiling great_circle_cy_v2.pyx because it changed.\n",
"[1/1] Cythonizing great_circle_cy_v2.pyx\n",
"/home/junjiet/conda/lib/python3.7/site-packages/Cython/Compiler/Main.py:369: FutureWarning: Cython directive 'language_level' not set, using 2 for now (Py2). This will change in a later release! File: /home/junjiet/data_science2020/2.数据处理/test_cython/great_circle_cy_v2.pyx\n",
" tree = Parsing.p_module(s, pxd, full_module_name)\n",
"running build_ext\n",
"building 'great_circle_cy_v2' extension\n",
"/home/junjiet/conda/bin/x86_64-conda_cos6-linux-gnu-cc -Wno-unused-result -Wsign-compare -DNDEBUG -fwrapv -O2 -Wall -Wstrict-prototypes -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/junjiet/conda/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/junjiet/conda/include -fPIC -I/home/junjiet/conda/include/python3.7m -c great_circle_cy_v2.c -o build/temp.linux-x86_64-3.7/great_circle_cy_v2.o\n",
"x86_64-conda_cos6-linux-gnu-gcc -pthread -shared -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,-rpath,/home/junjiet/conda/lib -L/home/junjiet/conda/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,-rpath,/home/junjiet/conda/lib -L/home/junjiet/conda/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,-rpath,/home/junjiet/conda/lib -Wl,-rpath-link,/home/junjiet/conda/lib -L/home/junjiet/conda/lib -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/junjiet/conda/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/junjiet/conda/include build/temp.linux-x86_64-3.7/great_circle_cy_v2.o -o /home/junjiet/data_science2020/2.数据处理/test_cython/great_circle_cy_v2.cpython-37m-x86_64-linux-gnu.so\n"
]
}
],
"source": [
"!python great_circle_setup_v2.py build_ext --inplace"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:15:44.882132Z",
"start_time": "2020-05-14T07:15:43.383184Z"
}
},
"outputs": [],
"source": [
"from great_circle_cy_v2 import great_circle\n",
"for i in range(num):\n",
" great_circle(lon1, lat1, lon2, lat2)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:16:03.253476Z",
"start_time": "2020-05-14T07:16:03.240373Z"
},
"scrolled": true,
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
"\n",
" \n",
" Cython: _cython_magic_001d315da99fd0491a49a895f572c5f0.pyx\n",
" \n",
"\n",
"\n",
"Generated by Cython 0.29.15
\n",
"\n",
" Yellow lines hint at Python interaction.
\n",
" Click on a line that starts with a \"+
\" to see the C code that Cython generated for it.\n",
"
\n",
" 01:
\n",
"
02: cdef extern from "math.h":
\n",
"
03: float cosf(float theta)
\n",
"
04: float sinf(float theta)
\n",
"
05: float acosf(float theta)
\n",
"
06:
\n",
"
+07: cpdef double great_circle(double lon1, double lat1, double lon2, double lat2):
\n",
"
static PyObject *__pyx_pw_46_cython_magic_001d315da99fd0491a49a895f572c5f0_1great_circle(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/\n",
"static double __pyx_f_46_cython_magic_001d315da99fd0491a49a895f572c5f0_great_circle(double __pyx_v_lon1, double __pyx_v_lat1, double __pyx_v_lon2, double __pyx_v_lat2, CYTHON_UNUSED int __pyx_skip_dispatch) {\n",
" double __pyx_v_a;\n",
" double __pyx_v_b;\n",
" double __pyx_v_theta;\n",
" double __pyx_v_c;\n",
" double __pyx_v_x;\n",
" double __pyx_v_radius;\n",
" double __pyx_v_pi;\n",
" double __pyx_r;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"great_circle\", 0);\n",
"/* … */\n",
" /* function exit code */\n",
" __pyx_L0:;\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"\n",
"/* Python wrapper */\n",
"static PyObject *__pyx_pw_46_cython_magic_001d315da99fd0491a49a895f572c5f0_1great_circle(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/\n",
"static PyObject *__pyx_pw_46_cython_magic_001d315da99fd0491a49a895f572c5f0_1great_circle(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {\n",
" double __pyx_v_lon1;\n",
" double __pyx_v_lat1;\n",
" double __pyx_v_lon2;\n",
" double __pyx_v_lat2;\n",
" PyObject *__pyx_r = 0;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"great_circle (wrapper)\", 0);\n",
" {\n",
" static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_lon1,&__pyx_n_s_lat1,&__pyx_n_s_lon2,&__pyx_n_s_lat2,0};\n",
" PyObject* values[4] = {0,0,0,0};\n",
" if (unlikely(__pyx_kwds)) {\n",
" Py_ssize_t kw_args;\n",
" const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);\n",
" switch (pos_args) {\n",
" case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);\n",
" CYTHON_FALLTHROUGH;\n",
" case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);\n",
" CYTHON_FALLTHROUGH;\n",
" case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);\n",
" CYTHON_FALLTHROUGH;\n",
" case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);\n",
" CYTHON_FALLTHROUGH;\n",
" case 0: break;\n",
" default: goto __pyx_L5_argtuple_error;\n",
" }\n",
" kw_args = PyDict_Size(__pyx_kwds);\n",
" switch (pos_args) {\n",
" case 0:\n",
" if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lon1)) != 0)) kw_args--;\n",
" else goto __pyx_L5_argtuple_error;\n",
" CYTHON_FALLTHROUGH;\n",
" case 1:\n",
" if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lat1)) != 0)) kw_args--;\n",
" else {\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, 1); __PYX_ERR(0, 7, __pyx_L3_error)\n",
" }\n",
" CYTHON_FALLTHROUGH;\n",
" case 2:\n",
" if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lon2)) != 0)) kw_args--;\n",
" else {\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, 2); __PYX_ERR(0, 7, __pyx_L3_error)\n",
" }\n",
" CYTHON_FALLTHROUGH;\n",
" case 3:\n",
" if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lat2)) != 0)) kw_args--;\n",
" else {\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, 3); __PYX_ERR(0, 7, __pyx_L3_error)\n",
" }\n",
" }\n",
" if (unlikely(kw_args > 0)) {\n",
" if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, \"great_circle\") < 0)) __PYX_ERR(0, 7, __pyx_L3_error)\n",
" }\n",
" } else if (PyTuple_GET_SIZE(__pyx_args) != 4) {\n",
" goto __pyx_L5_argtuple_error;\n",
" } else {\n",
" values[0] = PyTuple_GET_ITEM(__pyx_args, 0);\n",
" values[1] = PyTuple_GET_ITEM(__pyx_args, 1);\n",
" values[2] = PyTuple_GET_ITEM(__pyx_args, 2);\n",
" values[3] = PyTuple_GET_ITEM(__pyx_args, 3);\n",
" }\n",
" __pyx_v_lon1 = __pyx_PyFloat_AsDouble(values[0]); if (unlikely((__pyx_v_lon1 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 7, __pyx_L3_error)\n",
" __pyx_v_lat1 = __pyx_PyFloat_AsDouble(values[1]); if (unlikely((__pyx_v_lat1 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 7, __pyx_L3_error)\n",
" __pyx_v_lon2 = __pyx_PyFloat_AsDouble(values[2]); if (unlikely((__pyx_v_lon2 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 7, __pyx_L3_error)\n",
" __pyx_v_lat2 = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_lat2 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 7, __pyx_L3_error)\n",
" }\n",
" goto __pyx_L4_argument_unpacking_done;\n",
" __pyx_L5_argtuple_error:;\n",
" __Pyx_RaiseArgtupleInvalid(\"great_circle\", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 7, __pyx_L3_error)\n",
" __pyx_L3_error:;\n",
" __Pyx_AddTraceback(\"_cython_magic_001d315da99fd0491a49a895f572c5f0.great_circle\", __pyx_clineno, __pyx_lineno, __pyx_filename);\n",
" __Pyx_RefNannyFinishContext();\n",
" return NULL;\n",
" __pyx_L4_argument_unpacking_done:;\n",
" __pyx_r = __pyx_pf_46_cython_magic_001d315da99fd0491a49a895f572c5f0_great_circle(__pyx_self, __pyx_v_lon1, __pyx_v_lat1, __pyx_v_lon2, __pyx_v_lat2);\n",
"\n",
" /* function exit code */\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"\n",
"static PyObject *__pyx_pf_46_cython_magic_001d315da99fd0491a49a895f572c5f0_great_circle(CYTHON_UNUSED PyObject *__pyx_self, double __pyx_v_lon1, double __pyx_v_lat1, double __pyx_v_lon2, double __pyx_v_lat2) {\n",
" PyObject *__pyx_r = NULL;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"great_circle\", 0);\n",
" __Pyx_XDECREF(__pyx_r);\n",
" __pyx_t_1 = PyFloat_FromDouble(__pyx_f_46_cython_magic_001d315da99fd0491a49a895f572c5f0_great_circle(__pyx_v_lon1, __pyx_v_lat1, __pyx_v_lon2, __pyx_v_lat2, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __pyx_r = __pyx_t_1;\n",
" __pyx_t_1 = 0;\n",
" goto __pyx_L0;\n",
"\n",
" /* function exit code */\n",
" __pyx_L1_error:;\n",
" __Pyx_XDECREF(__pyx_t_1);\n",
" __Pyx_AddTraceback(\"_cython_magic_001d315da99fd0491a49a895f572c5f0.great_circle\", __pyx_clineno, __pyx_lineno, __pyx_filename);\n",
" __pyx_r = NULL;\n",
" __pyx_L0:;\n",
" __Pyx_XGIVEREF(__pyx_r);\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"
08: cdef double a, b, theta, c, x, radius
\n",
"
+09: cdef double pi = 3.141592653589793
\n",
"
__pyx_v_pi = 3.141592653589793;\n",
"
10:
\n",
"
+11: radius = 6371 # 公里
\n",
"
__pyx_v_radius = 6371.0;\n",
"
+12: x = pi/180
\n",
"
__pyx_v_x = (__pyx_v_pi / 180.0);\n",
"
13:
\n",
"
+14: a = (90-lat1)*(x)
\n",
"
__pyx_v_a = ((90.0 - __pyx_v_lat1) * __pyx_v_x);\n",
"
+15: b = (90-lat2)*(x)
\n",
"
__pyx_v_b = ((90.0 - __pyx_v_lat2) * __pyx_v_x);\n",
"
+16: theta = (lon2-lon1)*(x)
\n",
"
__pyx_v_theta = ((__pyx_v_lon2 - __pyx_v_lon1) * __pyx_v_x);\n",
"
+17: c = acosf((cosf(a)*cosf(b)) + (sinf(a)*sinf(b)*cosf(theta)))
\n",
"
__pyx_v_c = acosf(((cosf(__pyx_v_a) * cosf(__pyx_v_b)) + ((sinf(__pyx_v_a) * sinf(__pyx_v_b)) * cosf(__pyx_v_theta))));\n",
"
+18: return radius*c
\n",
"
__pyx_r = (__pyx_v_radius * __pyx_v_c);\n",
" goto __pyx_L0;\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%cython -a\n",
"\n",
"cdef extern from \"math.h\":\n",
" float cosf(float theta)\n",
" float sinf(float theta)\n",
" float acosf(float theta)\n",
"\n",
"cpdef double great_circle(double lon1, double lat1, double lon2, double lat2):\n",
" cdef double a, b, theta, c, x, radius\n",
" cdef double pi = 3.141592653589793\n",
"\n",
" radius = 6371 # 公里\n",
" x = pi/180\n",
"\n",
" a = (90-lat1)*(x)\n",
" b = (90-lat2)*(x)\n",
" theta = (lon2-lon1)*(x)\n",
" c = acosf((cosf(a)*cosf(b)) + (sinf(a)*sinf(b)*cosf(theta)))\n",
" return radius*c"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## [Numba](http://numba.pydata.org)\n",
"\n",
"通过装饰器控制Python解释器把函数转变成机器码,实现了与C和Cython同样的性能,但是不需要用新的解释器或者写C代码。可以按需生成优化(JIT)的机器码,甚至可以编译成CPU或GPU可执行代码。\n",
"\n",
"- JIT即时代码生成(On-the-fly code generation)\n",
"- CPU和GPU原生代码生成\n",
"- 与Numpy相关包交互"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### `@jit`装饰器"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:17:43.775964Z",
"start_time": "2020-05-14T07:17:43.725961Z"
},
"scrolled": true
},
"outputs": [],
"source": [
"a = np.random.rand(1000, 1000)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:17:52.855889Z",
"start_time": "2020-05-14T07:17:44.564557Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"275 ms ± 13.1 ms per loop (mean ± std. dev. of 3 runs, 10 loops each)\n"
]
}
],
"source": [
"def sum2d(arr):\n",
" M, N = arr.shape\n",
" result = 0\n",
" for i in range(M):\n",
" for j in range(N):\n",
" result += arr[i, j]\n",
" return result\n",
"\n",
"%timeit -r3 -n10 sum2d(a)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"#### 延迟编译(Lazy compilation)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:18:06.926856Z",
"start_time": "2020-05-14T07:18:04.858154Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.28 ms ± 48.7 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"from numba import jit\n",
"\n",
"# jit装饰器告诉Numba编译函数,当函数被调用时,Numba再引入参数类型\n",
"@jit\n",
"def sum2d(arr):\n",
" M, N = arr.shape\n",
" result = 0\n",
" for i in range(M):\n",
" for j in range(N):\n",
" result += arr[i, j]\n",
" return result\n",
"\n",
"%timeit sum2d(a)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"#### 即时编译(Eager compilation)\n",
"\n",
"由于python支持动态类型,因此`@jit`装饰器可以设置函数的接收类型(返回类型),按照配置参数进行优化,适合进行浮点数精度控制float32、float64。"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:18:27.588685Z",
"start_time": "2020-05-14T07:18:19.544085Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"949 µs ± 1.69 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"from numba import jit, float64\n",
"\n",
"\n",
"@jit(float64(float64[:, :]))\n",
"def sum2d(arr):\n",
" M, N = arr.shape\n",
" result = 0\n",
" for i in range(M):\n",
" for j in range(N):\n",
" result += arr[i, j]\n",
" return result\n",
"\n",
"\n",
"%timeit sum2d(a)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"@jit配置函数签名的常用类型如下。\n",
"\n",
"- `void`:函数返回值类型,表示不返回任何结果。\n",
"- `intp`和`uintp`:指针大小的整数,分别表示签名和无签名类型。\n",
"- `intc`和`uintc`:相当于C语言的整型和无符号整型。\n",
"- `int8`、`int16`、`int32`和`int64`:固定宽度整型(无符号整型前面加`u`,比如`uint8`)。\n",
"- `float32`和`float64`:单精度和双精度浮点数类型。\n",
"- `complex64`和`complex128`:单精度和双精度复数类型。\n",
"- 数组可以用任何带索引的数值类型表示,比如`float32[:]`就是一维浮点数数组类型,`int32[:,:]`就是二维整型数组。"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### 编译选项\n",
"\n",
"1. 非GIL模式:把`nogil=True`属性传到装饰器,就可以不受GIL的限制,多线程系统的常见问题(一致性、数据同步、竞态条件等)就可以解决。\n",
"1. [无Python模式](https://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#numba-troubleshooting):可以通过`nopython`参数设置Numba的编译模式:\n",
" 1. `object`模式:默认模式,产生的代码可以处理所有Python对象,并用C API完成Python对象上的操作;\n",
" 1. `nopython`模式:可以不调用C API而生成更高效的代码,不过只有一部分函数和方法可以使用:\n",
" - 函数中表示数值的所有原生类型都可以被引用\n",
" - 函数中不可以分配新内存\n",
"1. 缓存模式:避免重复调用,通过`cache=True`将结果保证在缓存文件中\n",
"1. [并行模式](https://numba.pydata.org/numba-doc/latest/user/parallel.html#numba-parallel):通过`parallel=True`并行计算,必须配合`nopython=True`使用"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T07:19:11.138043Z",
"start_time": "2020-05-14T07:19:02.779467Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"962 µs ± 29.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"@jit(nopython=True)\n",
"def sum2d(arr):\n",
" M, N = arr.shape\n",
" result = 0\n",
" for i in range(M):\n",
" for j in range(N):\n",
" result += arr[i, j]\n",
" return result\n",
"\n",
"\n",
"%timeit sum2d(a)"
]
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-13T12:16:43.068215Z",
"start_time": "2020-05-13T12:16:43.027926Z"
},
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"source": [
"from numba import prange\n",
"\n",
"@jit(parallel=True, nopython=True)\n",
"def sum2d(arr):\n",
" M, N = arr.shape\n",
" result = 0\n",
" for i in prange(M):\n",
" for j in range(N):\n",
" result += arr[i, j]\n",
" return result\n",
"\n",
"\n",
"%timeit sum2d(a)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. @njit:@jit(nopython=True)的\n",
"1. @vectorize与@guvectorize:支持NumPy的通用函数(ufunc)\n",
"1. @stencil:定义一个核函数实现stencil(模版)类操作\n",
"1. @jitclass:jit编译python类\n",
"1. @cfunc:定义可以被C/C++直接调用的函数\n",
"1. @overload:注册一个在nopython模式使用自定义函数"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# [pyspark](https://spark.apache.org/docs/latest/api/python/)\n",
"\n",
""
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"## 安装\n",
"\n",
"直接用connda安装即可,自动配置\n",
"\n",
"```bash\n",
"conda install pyspark -c conda-forge\n",
"pip install findspark\n",
"```\n",
"\n",
"## 初始化"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:30.735658Z",
"start_time": "2020-05-15T06:47:27.169577Z"
}
},
"outputs": [],
"source": [
"import findspark\n",
"\n",
"findspark.init(spark_home=\"/home/junjiet/conda/lib/python3.7/site-packages/pyspark\")\n",
"\n",
"from pyspark.sql import SparkSession, dataframe\n",
"from pyspark import SparkConf, SparkContext\n",
"from pyspark.sql.types import *\n",
"from pyspark.sql import functions as F\n",
"\n",
"sparkConf = SparkConf().set(\"spark.sql.execution.arrow.enabled\", \"false\")\n",
"spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()\n",
"sc = SparkContext.getOrCreate()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"## [RDD简介](https://spark.apache.org/docs/latest/api/python/pyspark.html#pyspark.RDD)\n",
"\n",
"RDD(Resilient Distributed DataSet,弹性分布式数据集),是Spark中最基本的数据抽象是,具有分区,不可变,并行操作特点\n",
"\n",
""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:32.045112Z",
"start_time": "2020-05-15T06:47:31.757048Z"
}
},
"outputs": [],
"source": [
"rdd = sc.parallelize([1, 2, 2, 3, 3, 4, 5])"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T08:51:17.822011Z",
"start_time": "2020-05-14T08:51:17.148564Z"
},
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### RDD常用转换(Transformation)API\n",
""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:33.453966Z",
"start_time": "2020-05-15T06:47:32.300347Z"
},
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"[2, 2, 4]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rdd.filter(lambda x: x % 2 == 0).collect()"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T08:32:46.666843Z",
"start_time": "2020-05-14T08:32:46.659191Z"
},
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### RDD常用动作(Action)API\n",
""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:33.609323Z",
"start_time": "2020-05-15T06:47:33.455841Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"7"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rdd.count()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:34.247797Z",
"start_time": "2020-05-15T06:47:33.627380Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[1, 2, 3, 4, 5]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rdd.distinct().collect()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"## RDD与DataFrame基本操作"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"source": [
"![](2.data-elt/rdd.png)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"![](2.data-elt/pysparkdf.png)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"## Dataframe"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:35.241922Z",
"start_time": "2020-05-15T06:47:35.235227Z"
}
},
"outputs": [],
"source": [
"schema = (\n",
" StructType()\n",
" .add(\"user_id\", \"string\")\n",
" .add(\"country\", \"string\")\n",
" .add(\"browser\", \"string\")\n",
" .add(\"OS\", \"string\")\n",
" .add(\"age\", \"integer\")\n",
" .add(\"salary\", \"double\")\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:37.735454Z",
"start_time": "2020-05-15T06:47:36.017342Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"root\n",
" |-- user_id: string (nullable = true)\n",
" |-- country: string (nullable = true)\n",
" |-- browser: string (nullable = true)\n",
" |-- OS: string (nullable = true)\n",
" |-- age: integer (nullable = true)\n",
" |-- salary: double (nullable = true)\n",
"\n"
]
}
],
"source": [
"df = spark.createDataFrame(\n",
" [\n",
" (\"A203\", \"India\", \"Chrome\", \"WIN\", 33, 12.34),\n",
" (\"A201\", \"China\", \"Safari\", \"MacOS\", 45, 14.56),\n",
" (\"A205\", \"UK\", \"Mozilla\", \"Linux\", 25, 16.78),\n",
" (\"A206\", \"China\", \"Chrome\", \"MacOS\", 68, 23.45),\n",
" ],\n",
" schema=schema,\n",
")\n",
"\n",
"df.printSchema()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:38.828103Z",
"start_time": "2020-05-15T06:47:37.738535Z"
},
"scrolled": false,
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+-------+-------+-----+---+------+\n",
"|user_id|country|browser| OS|age|salary|\n",
"+-------+-------+-------+-----+---+------+\n",
"| A203| India| Chrome| WIN| 33| 12.34|\n",
"| A201| China| Safari|MacOS| 45| 14.56|\n",
"| A205| UK|Mozilla|Linux| 25| 16.78|\n",
"| A206| China| Chrome|MacOS| 68| 23.45|\n",
"+-------+-------+-------+-----+---+------+\n",
"\n"
]
}
],
"source": [
"df.show()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:39.105824Z",
"start_time": "2020-05-15T06:47:38.831614Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"DataFrame[user_id: string, country: string, browser: string, OS: string, age: int, salary: double]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.filter(df[\"age\"] > 30)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:39.517904Z",
"start_time": "2020-05-15T06:47:39.107273Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.filter(df[\"age\"] > 30).count()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:47:40.279015Z",
"start_time": "2020-05-15T06:47:39.928238Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+-------+-------+-----+---+------+\n",
"|user_id|country|browser| OS|age|salary|\n",
"+-------+-------+-------+-----+---+------+\n",
"| A201| China| Safari|MacOS| 45| 14.56|\n",
"| A206| China| Chrome|MacOS| 68| 23.45|\n",
"+-------+-------+-------+-----+---+------+\n",
"\n"
]
}
],
"source": [
"df.where((df[\"age\"] > 30) & (df[\"country\"] == \"China\")).show()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:58:48.483420Z",
"start_time": "2020-05-15T06:58:48.233972Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user_id | \n",
" country | \n",
" browser | \n",
" OS | \n",
" age | \n",
" salary | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A203 | \n",
" India | \n",
" Chrome | \n",
" WIN | \n",
" 33 | \n",
" 12.34 | \n",
"
\n",
" \n",
" 1 | \n",
" A201 | \n",
" China | \n",
" Safari | \n",
" MacOS | \n",
" 45 | \n",
" 14.56 | \n",
"
\n",
" \n",
" 2 | \n",
" A205 | \n",
" UK | \n",
" Mozilla | \n",
" Linux | \n",
" 25 | \n",
" 16.78 | \n",
"
\n",
" \n",
" 3 | \n",
" A206 | \n",
" China | \n",
" Chrome | \n",
" MacOS | \n",
" 68 | \n",
" 23.45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user_id country browser OS age salary\n",
"0 A203 India Chrome WIN 33 12.34\n",
"1 A201 China Safari MacOS 45 14.56\n",
"2 A205 UK Mozilla Linux 25 16.78\n",
"3 A206 China Chrome MacOS 68 23.45"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.toPandas()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## 自定义函数"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### DataFrame属性"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:58:50.748839Z",
"start_time": "2020-05-15T06:58:50.740716Z"
}
},
"outputs": [],
"source": [
"from pyspark.sql import dataframe\n",
"\n",
"def spark_shape(self):\n",
" return (self.count(), len(self.columns))\n",
"dataframe.DataFrame.shape = spark_shape"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:58:51.856018Z",
"start_time": "2020-05-15T06:58:51.641149Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(4, 6)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### UDF"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:58:53.737059Z",
"start_time": "2020-05-15T06:58:53.161264Z"
},
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+-------+-------+-----+---+------+------------+\n",
"|user_id|country|browser| OS|age|salary|age_category|\n",
"+-------+-------+-------+-----+---+------+------------+\n",
"| A203| India| Chrome| WIN| 33| 12.34| B|\n",
"| A201| China| Safari|MacOS| 45| 14.56| B|\n",
"| A205| UK|Mozilla|Linux| 25| 16.78| A|\n",
"| A206| China| Chrome|MacOS| 68| 23.45| C|\n",
"+-------+-------+-------+-----+---+------+------------+\n",
"\n"
]
}
],
"source": [
"from pyspark.sql.functions import udf\n",
"\n",
"\n",
"def age_category(age):\n",
" if 18 <= age < 30:\n",
" return \"A\"\n",
" elif age < 60:\n",
" return \"B\"\n",
" else:\n",
" return \"C\"\n",
"\n",
"\n",
"age_udf = udf(age_category, StringType())\n",
"\n",
"df.withColumn(\"age_category\", age_udf(df[\"age\"])).show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Pandas UDF"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T06:58:55.635726Z",
"start_time": "2020-05-15T06:58:55.362825Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(12.34, 23.45)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"min_sal, max_sal = df.agg(F.min(\"salary\"), F.max(\"salary\")).collect()[0]\n",
"min_sal, max_sal"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-15T07:15:09.975720Z",
"start_time": "2020-05-15T07:15:09.969746Z"
},
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"source": [
"from pyspark.sql.functions import pandas_udf\n",
"\n",
"def scaled_salary(salary):\n",
" scaled_sal = (salary - min_sal) / (max_sal - min_sal)\n",
" return scaled_sal\n",
"\n",
"\n",
"scaling_udf = pandas_udf(scaled_salary, DoubleType())"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"ExecuteTime": {
"end_time": "2020-05-14T09:33:01.423540Z",
"start_time": "2020-05-14T09:33:01.419429Z"
},
"scrolled": false,
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"source": [
"df.select(df[\"salary\"], scaling_udf(df[\"salary\"])).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![](http://dblab.xmu.edu.cn/wp-content/themes/labstyle/images/branding.png)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"1. [捕蛇者说](https://pythonhunter.org/) 中文python播客,有趣有料\n",
"1. [pandas_profiling](https://github.com/pandas-profiling/pandas-profiling) EDA可视化报表,支持导出html格式\n",
"2. [pandarallel](https://github.com/nalepae/pandarallel) CPU并行加速,apply、map、groupby与rolling等应用场景\n",
"3. [jax](https://github.com/google/jax) NumPy的GPU加速——谷歌开源,jakavdp参与开发\n",
"4. [cudf](https://github.com/rapidsai/cudf) Datafame的GPU加速\n",
"5. [koalas](https://koalas.readthedocs.io/en/latest/index.html) Databricks按照pandas实现的pyspark接口"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}