Last active
October 27, 2025 09:44
-
-
Save hathibelagal-dev/6f2d35a1bf01b2222a0e57cf5c493416 to your computer and use it in GitHub Desktop.
deepseekocr.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "gpuType": "T4", | |
| "authorship_tag": "ABX9TyN1b8Rb+9emWJvuMAK42Lc3", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "accelerator": "GPU" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/hathibelagal-dev/6f2d35a1bf01b2222a0e57cf5c493416/deepseekocr.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Optional\n", | |
| "This is for word-wrapping the output of OCR process" | |
| ], | |
| "metadata": { | |
| "id": "LJVecLhurpri" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from IPython.display import HTML, display\n", | |
| "\n", | |
| "def set_css():\n", | |
| " display(HTML('''\n", | |
| " <style>\n", | |
| " pre {\n", | |
| " white-space: pre-wrap;\n", | |
| " }\n", | |
| " </style>\n", | |
| " '''))\n", | |
| "get_ipython().events.register('pre_run_cell', set_css)" | |
| ], | |
| "metadata": { | |
| "id": "H01_mrj5EImo" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Setup" | |
| ], | |
| "metadata": { | |
| "id": "x6gOGtW0mEwm" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!pip install --no-deps -q bitsandbytes" | |
| ], | |
| "metadata": { | |
| "id": "axaHlCqp8vMO" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!pip install addict transformers==4.46.3 tokenizers==0.20.3 pdf2image" | |
| ], | |
| "metadata": { | |
| "id": "lRq1Gp3R5veF" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!apt install poppler-utils" | |
| ], | |
| "metadata": { | |
| "id": "eNwbNdtIlKQX" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!mkdir -p /content/outputs /content/pdf_pages/" | |
| ], | |
| "metadata": { | |
| "id": "tHhuMouI6tH7" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### PDF to Images" | |
| ], | |
| "metadata": { | |
| "id": "nTeG5pyemIf7" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from pdf2image import convert_from_path\n", | |
| "\n", | |
| "pdf_file = 'test.pdf'\n", | |
| "images = convert_from_path(pdf_file)\n", | |
| "\n", | |
| "for i, image in enumerate(images):\n", | |
| " image.save(f'/content/pdf_pages/page_{i+1}.jpg', 'JPEG')" | |
| ], | |
| "metadata": { | |
| "id": "fg65Tr_kkBdw" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from IPython.display import Image as _I\n", | |
| "_I(\"/content/pdf_pages/page_1.jpg\", width=640)" | |
| ], | |
| "metadata": { | |
| "id": "2OTEhoXyBbTN" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### OCR" | |
| ], | |
| "metadata": { | |
| "id": "X5ETqD2OmMHM" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "4urzCKTC5ArJ" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig\n", | |
| "import torch\n", | |
| "\n", | |
| "model_name = 'deepseek-ai/DeepSeek-OCR'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "qc = BitsAndBytesConfig(\n", | |
| " load_in_4bit=True,\n", | |
| " bnb_4bit_use_double_quant=True,\n", | |
| " bnb_4bit_quant_type=\"nf4\",\n", | |
| " bnb_4bit_compute_dtype=torch.float\n", | |
| ")" | |
| ], | |
| "metadata": { | |
| "id": "2Qf2ctNZ82Ic" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", | |
| "model = AutoModel.from_pretrained(\n", | |
| " model_name, trust_remote_code=True,\n", | |
| " use_safetensors=True, device_map=\"auto\",\n", | |
| " quantization_config=qc, torch_dtype=torch.float\n", | |
| ")\n", | |
| "model = model.eval()" | |
| ], | |
| "metadata": { | |
| "id": "z45-aaPY5F0u" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "page_number = 5 #@param {type:\"integer\"}\n", | |
| "\n", | |
| "prompt = \"<image>\\nParse the figure.\"\n", | |
| "image_file = f'/content/pdf_pages/page_{page_number}.jpg'\n", | |
| "output_path = f'/content/outputs/page_{page_number}'" | |
| ], | |
| "metadata": { | |
| "id": "HijaQwdYmiR7" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "%%time\n", | |
| "model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = output_path, base_size = 1024, image_size = 1024, crop_mode=False, save_results = True, test_compress = True)" | |
| ], | |
| "metadata": { | |
| "id": "gHpzukbw8g7t" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
iqiancheng
commented
Oct 27, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment