{
"cells": [
{
"cell_type": "markdown",
"id": "e10fb2c2",
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"source": [
"# 锚框\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e079962e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T07:00:42.677769Z",
"iopub.status.busy": "2023-08-18T07:00:42.676695Z",
"iopub.status.idle": "2023-08-18T07:00:45.106116Z",
"shell.execute_reply": "2023-08-18T07:00:45.104773Z"
},
"origin_pos": 2,
"tab": [
"pytorch"
]
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import torch\n",
"from d2l import torch as d2l\n",
"\n",
"torch.set_printoptions(2)"
]
},
{
"cell_type": "markdown",
"id": "b96c5129",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"锚框的宽度和高度分别是$hs\\sqrt{r}$和$hs/\\sqrt{r}$。\n",
"我们只考虑\n",
"组合:\n",
"\n",
"$$(s_1, r_1), (s_1, r_2), \\ldots, (s_1, r_m), (s_2, r_1), (s_3, r_1), \\ldots, (s_n, r_1)$$"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4c5fb635",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T07:00:45.112186Z",
"iopub.status.busy": "2023-08-18T07:00:45.111657Z",
"iopub.status.idle": "2023-08-18T07:00:45.126939Z",
"shell.execute_reply": "2023-08-18T07:00:45.125859Z"
},
"origin_pos": 6,
"tab": [
"pytorch"
]
},
"outputs": [],
"source": [
"def multibox_prior(data, sizes, ratios):\n",
" \"\"\"生成以每个像素为中心具有不同形状的锚框\"\"\"\n",
" in_height, in_width = data.shape[-2:]\n",
" device, num_sizes, num_ratios = data.device, len(sizes), len(ratios)\n",
" boxes_per_pixel = (num_sizes + num_ratios - 1)\n",
" size_tensor = torch.tensor(sizes, device=device)\n",
" ratio_tensor = torch.tensor(ratios, device=device)\n",
"\n",
" offset_h, offset_w = 0.5, 0.5\n",
" steps_h = 1.0 / in_height\n",
" steps_w = 1.0 / in_width\n",
"\n",
" center_h = (torch.arange(in_height, device=device) + offset_h) * steps_h\n",
" center_w = (torch.arange(in_width, device=device) + offset_w) * steps_w\n",
" shift_y, shift_x = torch.meshgrid(center_h, center_w, indexing='ij')\n",
" shift_y, shift_x = shift_y.reshape(-1), shift_x.reshape(-1)\n",
"\n",
" w = torch.cat((size_tensor * torch.sqrt(ratio_tensor[0]),\n",
" sizes[0] * torch.sqrt(ratio_tensor[1:])))\\\n",
" * in_height / in_width\n",
" h = torch.cat((size_tensor / torch.sqrt(ratio_tensor[0]),\n",
" sizes[0] / torch.sqrt(ratio_tensor[1:])))\n",
" anchor_manipulations = torch.stack((-w, -h, w, h)).T.repeat(\n",
" in_height * in_width, 1) / 2\n",
"\n",
" out_grid = torch.stack([shift_x, shift_y, shift_x, shift_y],\n",
" dim=1).repeat_interleave(boxes_per_pixel, dim=0)\n",
" output = out_grid + anchor_manipulations\n",
" return output.unsqueeze(0)"
]
},
{
"cell_type": "markdown",
"id": "27883d90",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"返回的锚框变量`Y`的形状"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f411d4af",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T07:00:45.131714Z",
"iopub.status.busy": "2023-08-18T07:00:45.131003Z",
"iopub.status.idle": "2023-08-18T07:00:45.238891Z",
"shell.execute_reply": "2023-08-18T07:00:45.237843Z"
},
"origin_pos": 10,
"tab": [
"pytorch"
]
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"561 728\n"
]
},
{
"data": {
"text/plain": [
"torch.Size([1, 2042040, 4])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"img = d2l.plt.imread('../img/catdog.jpg')\n",
"h, w = img.shape[:2]\n",
"\n",
"print(h, w)\n",
"X = torch.rand(size=(1, 3, h, w))\n",
"Y = multibox_prior(X, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5])\n",
"Y.shape"
]
},
{
"cell_type": "markdown",
"id": "3ebaca32",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"访问以(250,250)为中心的第一个锚框"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a7b7cfa3",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T07:00:45.244522Z",
"iopub.status.busy": "2023-08-18T07:00:45.243982Z",
"iopub.status.idle": "2023-08-18T07:00:45.252916Z",
"shell.execute_reply": "2023-08-18T07:00:45.251985Z"
},
"origin_pos": 13,
"tab": [
"pytorch"
]
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([0.06, 0.07, 0.63, 0.82])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"boxes = Y.reshape(h, w, 5, 4)\n",
"boxes[250, 250, 0, :]"
]
},
{
"cell_type": "markdown",
"id": "afa57c6b",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"显示以图像中以某个像素为中心的所有锚框"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c199e557",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T07:00:45.272415Z",
"iopub.status.busy": "2023-08-18T07:00:45.271753Z",
"iopub.status.idle": "2023-08-18T07:00:45.634073Z",
"shell.execute_reply": "2023-08-18T07:00:45.632866Z"
},
"origin_pos": 18,
"tab": [
"pytorch"
]
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
"