nan-wang · December 25, 2024 01:02
diff --git a/pixelshuffling.ipynb b/pixelshuffling.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/nan-wang/5a01f5b595682e815402114e72bf1211/pixelshuffling.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import torch"
      ],
      "metadata": {
        "id": "h7IG0Z2JsrIR"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "  # https://github.com/OpenGVLab/InternVL/blob/869d3be88d40d79162ca23b1ff5380d657883b55/internvl_chat/internvl/model/internvl_chat/modeling_internvl_chat.py#L252\n",
        "  def pixel_shuffle(x, scale_factor=0.5):\n",
        "        n, w, h, c = x.size()\n",
        "        # N, W, H, C --> N, W, H * scale, C // scale\n",
        "        x = x.view(n, w, int(h * scale_factor), int(c / scale_factor))\n",
        "        # N, W, H * scale, C // scale --> N, H * scale, W, C // scale\n",
        "        x = x.permute(0, 2, 1, 3).contiguous()\n",
        "        # N, H * scale, W, C // scale --> N, H * scale, W * scale, C // (scale ** 2)\n",
        "        x = x.view(n, int(h * scale_factor), int(w * scale_factor),\n",
        "                   int(c / (scale_factor * scale_factor)))\n",
        "\n",
        "        x = x.permute(0, 2, 1, 3).contiguous()\n",
        "        return x.reshape(n, -1, 32)"
      ],
      "metadata": {
        "id": "b35OP1ynseIu"
      },
      "execution_count": 129,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "batch_size = 1\n",
        "width = 4\n",
        "height = 4\n",
        "channels = 8\n",
        "fake_image_tensor = torch.tensor(\n",
        "    [[\n",
        "        [\n",
        "            [1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118],\n",
        "            [1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128],\n",
        "            [2131, 2132, 2133, 2134, 2135, 2136, 2137, 2138],\n",
        "            [2141, 2142, 2143, 2144, 2145, 2146, 2147, 2148],\n",
        "        ],\n",
        "        [\n",
        "            [1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218],\n",
        "            [1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228],\n",
        "            [2231, 2232, 2233, 2234, 2235, 2236, 2237, 2238],\n",
        "            [2241, 2242, 2243, 2244, 2245, 2246, 2247, 2248],\n",
        "        ],\n",
        "        [\n",
        "            [3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318],\n",
        "            [3321, 3322, 3323, 3324, 3325, 3326, 3327, 3328],\n",
        "            [4331, 4332, 4333, 4334, 4335, 4336, 4337, 4338],\n",
        "            [4341, 4342, 4343, 4344, 4345, 4346, 4347, 4348],\n",
        "        ],\n",
        "        [\n",
        "            [3411, 3412, 3413, 3414, 3415, 3416, 3417, 3418],\n",
        "            [3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428],\n",
        "            [4431, 4432, 4433, 4434, 4435, 4436, 4437, 4438],\n",
        "            [4441, 4442, 4443, 4444, 4445, 4446, 4447, 4448],\n",
        "        ],\n",
        "\n",
        "     ]])"
      ],
      "metadata": {
        "id": "Goi7juWUsmZJ"
      },
      "execution_count": 130,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "fake_image_tensor.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "bySaHRvhcc9u",
        "outputId": "93cdd1cd-46a6-4173-cf69-f93c55bce1f7"
      },
      "execution_count": 55,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "torch.Size([1, 4, 4, 8])"
            ]
          },
          "metadata": {},
          "execution_count": 55
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Pixel Shuffling on Image"
      ],
      "metadata": {
        "id": "Ia_AGIwevQQM"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "shuffled_image = pixel_shuffle(fake_image_tensor, 0.5) # 0.5 == unshuffle"
      ],
      "metadata": {
        "id": "TI1-ZhxDs0NZ"
      },
      "execution_count": 131,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "shuffled_image.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ycutWscYs6i7",
        "outputId": "130b5833-fc5e-477e-ace1-d2fedb048117"
      },
      "execution_count": 132,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "torch.Size([1, 4, 32])"
            ]
          },
          "metadata": {},
          "execution_count": 132
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "shuffled_image[0, :, :, 24]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8Zx1A7B1xim_",
        "outputId": "822f1e16-7253-462b-8a22-3539bc0a1807"
      },
      "execution_count": 120,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "tensor([[1221, 3421],\n",
              "        [2241, 4441]])"
            ]
          },
          "metadata": {},
          "execution_count": 120
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "shuffled_image[:, :, 8]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mYKnYgS95yK6",
        "outputId": "5fa7f1d6-1e21-4377-cc7f-700950b08bbc"
      },
      "execution_count": 135,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "tensor([[1121, 2141, 3321, 4341]])"
            ]
          },
          "metadata": {},
          "execution_count": 135
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "fake_image_tensor[0, :, :, 0].T"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "lfX62WZbxyVs",
        "outputId": "845635e7-5b21-4cf7-ca5c-fe452f37603d"
      },
      "execution_count": 78,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "tensor([[1111, 1211, 3311, 3411],\n",
              "        [1121, 1221, 3321, 3421],\n",
              "        [2131, 2231, 4331, 4431],\n",
              "        [2141, 2241, 4341, 4441]])"
            ]
          },
          "metadata": {},
          "execution_count": 78
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "fake_image_tensor[0, 2, :, :]  #"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9SG0Mvrt2Q0R",
        "outputId": "d837e726-7e14-49dd-9e3b-fa650ea4177c"
      },
      "execution_count": 80,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "tensor([[3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318],\n",
              "        [3321, 3322, 3323, 3324, 3325, 3326, 3327, 3328],\n",
              "        [4331, 4332, 4333, 4334, 4335, 4336, 4337, 4338],\n",
              "        [4341, 4342, 4343, 4344, 4345, 4346, 4347, 4348]])"
            ]
          },
          "metadata": {},
          "execution_count": 80
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "1=\"dark red\"\n",
        "2=\"dark blue\"\n",
        "3=\"light red\"\n",
        "4=\"light blue\"\n"
      ],
      "metadata": {
        "id": "Fs50ELwoAZW5"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "shuffled_image.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "dRpnug1RtHl6",
        "outputId": "658f6416-4db8-43ac-b04d-f4c437974bb4"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "torch.Size([1, 8, 8, 16])"
            ]
          },
          "metadata": {},
          "execution_count": 109
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/nan-wang/5a01f5b595682e815402114e72bf1211/pixelshuffling.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"import torch"
	],
	"metadata": {
	"id": "h7IG0Z2JsrIR"
	},
	"execution_count": 1,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	" # https://github.com/OpenGVLab/InternVL/blob/869d3be88d40d79162ca23b1ff5380d657883b55/internvl_chat/internvl/model/internvl_chat/modeling_internvl_chat.py#L252\n",
	" def pixel_shuffle(x, scale_factor=0.5):\n",
	" n, w, h, c = x.size()\n",
	" # N, W, H, C --> N, W, H * scale, C // scale\n",
	" x = x.view(n, w, int(h * scale_factor), int(c / scale_factor))\n",
	" # N, W, H * scale, C // scale --> N, H * scale, W, C // scale\n",
	" x = x.permute(0, 2, 1, 3).contiguous()\n",
	" # N, H * scale, W, C // scale --> N, H * scale, W * scale, C // (scale ** 2)\n",
	" x = x.view(n, int(h * scale_factor), int(w * scale_factor),\n",
	" int(c / (scale_factor * scale_factor)))\n",
	"\n",
	" x = x.permute(0, 2, 1, 3).contiguous()\n",
	" return x.reshape(n, -1, 32)"
	],
	"metadata": {
	"id": "b35OP1ynseIu"
	},
	"execution_count": 129,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"batch_size = 1\n",
	"width = 4\n",
	"height = 4\n",
	"channels = 8\n",
	"fake_image_tensor = torch.tensor(\n",
	" [[\n",
	" [\n",
	" [1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118],\n",
	" [1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128],\n",
	" [2131, 2132, 2133, 2134, 2135, 2136, 2137, 2138],\n",
	" [2141, 2142, 2143, 2144, 2145, 2146, 2147, 2148],\n",
	" ],\n",
	" [\n",
	" [1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218],\n",
	" [1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228],\n",
	" [2231, 2232, 2233, 2234, 2235, 2236, 2237, 2238],\n",
	" [2241, 2242, 2243, 2244, 2245, 2246, 2247, 2248],\n",
	" ],\n",
	" [\n",
	" [3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318],\n",
	" [3321, 3322, 3323, 3324, 3325, 3326, 3327, 3328],\n",
	" [4331, 4332, 4333, 4334, 4335, 4336, 4337, 4338],\n",
	" [4341, 4342, 4343, 4344, 4345, 4346, 4347, 4348],\n",
	" ],\n",
	" [\n",
	" [3411, 3412, 3413, 3414, 3415, 3416, 3417, 3418],\n",
	" [3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428],\n",
	" [4431, 4432, 4433, 4434, 4435, 4436, 4437, 4438],\n",
	" [4441, 4442, 4443, 4444, 4445, 4446, 4447, 4448],\n",
	" ],\n",
	"\n",
	" ]])"
	],
	"metadata": {
	"id": "Goi7juWUsmZJ"
	},
	"execution_count": 130,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"fake_image_tensor.shape"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "bySaHRvhcc9u",
	"outputId": "93cdd1cd-46a6-4173-cf69-f93c55bce1f7"
	},
	"execution_count": 55,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"torch.Size([1, 4, 4, 8])"
	]
	},
	"metadata": {},
	"execution_count": 55
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Pixel Shuffling on Image"
	],
	"metadata": {
	"id": "Ia_AGIwevQQM"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"shuffled_image = pixel_shuffle(fake_image_tensor, 0.5) # 0.5 == unshuffle"
	],
	"metadata": {
	"id": "TI1-ZhxDs0NZ"
	},
	"execution_count": 131,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"shuffled_image.shape"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "ycutWscYs6i7",
	"outputId": "130b5833-fc5e-477e-ace1-d2fedb048117"
	},
	"execution_count": 132,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"torch.Size([1, 4, 32])"
	]
	},
	"metadata": {},
	"execution_count": 132
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"shuffled_image[0, :, :, 24]"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "8Zx1A7B1xim_",
	"outputId": "822f1e16-7253-462b-8a22-3539bc0a1807"
	},
	"execution_count": 120,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"tensor([[1221, 3421],\n",
	" [2241, 4441]])"
	]
	},
	"metadata": {},
	"execution_count": 120
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"shuffled_image[:, :, 8]"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "mYKnYgS95yK6",
	"outputId": "5fa7f1d6-1e21-4377-cc7f-700950b08bbc"
	},
	"execution_count": 135,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"tensor([[1121, 2141, 3321, 4341]])"
	]
	},
	"metadata": {},
	"execution_count": 135
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"fake_image_tensor[0, :, :, 0].T"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "lfX62WZbxyVs",
	"outputId": "845635e7-5b21-4cf7-ca5c-fe452f37603d"
	},
	"execution_count": 78,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"tensor([[1111, 1211, 3311, 3411],\n",
	" [1121, 1221, 3321, 3421],\n",
	" [2131, 2231, 4331, 4431],\n",
	" [2141, 2241, 4341, 4441]])"
	]
	},
	"metadata": {},
	"execution_count": 78
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"fake_image_tensor[0, 2, :, :] #"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "9SG0Mvrt2Q0R",
	"outputId": "d837e726-7e14-49dd-9e3b-fa650ea4177c"
	},
	"execution_count": 80,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"tensor([[3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318],\n",
	" [3321, 3322, 3323, 3324, 3325, 3326, 3327, 3328],\n",
	" [4331, 4332, 4333, 4334, 4335, 4336, 4337, 4338],\n",
	" [4341, 4342, 4343, 4344, 4345, 4346, 4347, 4348]])"
	]
	},
	"metadata": {},
	"execution_count": 80
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"1=\"dark red\"\n",
	"2=\"dark blue\"\n",
	"3=\"light red\"\n",
	"4=\"light blue\"\n"
	],
	"metadata": {
	"id": "Fs50ELwoAZW5"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"shuffled_image.shape"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "dRpnug1RtHl6",
	"outputId": "658f6416-4db8-43ac-b04d-f4c437974bb4"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"torch.Size([1, 8, 8, 16])"
	]
	},
	"metadata": {},
	"execution_count": 109
	}
	]
	}
	]
	}