OCR Server

Turn your iPhone into a powerful local OCR server using Apple's Vision Framework. No cloud dependencies, unlimited usage, complete privacy.

Download from the App Store

How to Use

Launch the app and the server will start automatically
Access the displayed IP address from any device on the same network
Upload images to get text recognition results
Integrate the service into your applications via API
To ensure the app runs continuously without interruption, please enable iOS Guided Access mode and keep the screen on

OCR Test: On your computer, open a web browser and navigate to the IP address displayed by the app to perform an OCR test.

API Example - Upload an image via upload API:

curl -H "Accept: application/json" \
  -X POST http://<YOUR IP>:8000/upload \
  -F "[email protected]"

Python Upload Example:

import requests

url = "http://10.0.1.11:8000/upload"  # Replace with your IP address
file_path = "01.png"

with open(file_path, "rb") as f:
    files = {"file": f}
    headers = {"Accept": "application/json"}
    response = requests.post(url, files=files, headers=headers)

print("status code:", response.status_code)
print("response:", response.text)

The JSON response looks like this:

{
  "success": true,
  "message": "File uploaded successfully",
  "ocr_result": "Hello\nWorld",
  "image_width": 1247,
  "image_height": 648,
  "ocr_boxes": [
    {
      "text": "Hello",
      "x": 434.7201472051599,
      "y": 269.3123034733379,
      "w": 216.30970547749456,
      "h": 69.04344177246088
    },
    {
      "text": "World",
      "x": 429.5100030105896,
      "y": 420.4043957924413,
      "w": 242.85499225518635,
      "h": 73.382080078125
    }
  ]
}

image_width and image_height represent the width and height of the image (in px), x and y represent the top-left origin of the text bounding box (in px), w and h represent the width and height of the text bounding box (in px).

Python Example – Drawing text bounding boxes using ocr_boxes information:

#
# pip3 install requests pillow opencv-python
#

import os
import sys
import requests
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import cv2

url = "http://10.0.1.11:8000/upload"  # Replace with your IP address
file_path = "01.png"

# ===== Select font (supports Chinese and English), font size auto-scales with box height =====
def pick_font(box_h_px: float):
    font_candidates = [
        # macOS
        "/System/Library/Fonts/PingFang.ttc",
        "/System/Library/Fonts/STHeiti Light.ttc",
        # Windows
        r"C:\Windows\Fonts\msyh.ttc",
        r"C:\Windows\Fonts\msjh.ttc",
        r"C:\Windows\Fonts\arialuni.ttf",
        # Noto
        "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
        "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
    ]
    size = max(10, int(box_h_px * 0.25))  # Small font size = 25% of box height (minimum 10pt)
    for path in font_candidates:
        if os.path.exists(path):
            try:
                return ImageFont.truetype(path, size=size)
            except Exception:
                pass
    return ImageFont.load_default()

# ===== Draw box and small text =====
def draw_boxes(img_pil: Image.Image, boxes, line_thickness: int = 5) -> Image.Image:
    draw = ImageDraw.Draw(img_pil)
    for b in boxes:
        try:
            x = float(b["x"]); y = float(b["y"])
            w = float(b["w"]); h = float(b["h"])
            text = str(b.get("text", ""))
        except Exception:
            continue

        # Red bounding box
        x2, y2 = x + w, y + h
        draw.rectangle([x, y, x2, y2], outline=(255, 0, 0), width=line_thickness)

        # Top-right label
        font = pick_font(h)
        # Text size
        # textbbox returns (l, t, r, b)
        l, t, r, b = draw.textbbox((0, 0), text, font=font)
        tw, th = (r - l), (b - t)
        pad = max(2, int(h * 0.06))

        # Align label to top-right, not exceeding box or image edge
        tx = int(max(0, min(x2 - tw - pad, img_pil.width - tw - pad)))
        ty = int(max(0, min(y + pad, img_pil.height - th - pad)))

        # White background
        draw.rectangle([tx - pad, ty - pad, tx + tw + pad, ty + th + pad], fill=(255, 255, 255))
        draw.text((tx, ty), text, font=font, fill=(20, 20, 20))
    return img_pil

def main():
    if not os.path.exists(file_path):
        print(f"[ERROR] Image not found: {file_path}", file=sys.stderr)
        sys.exit(1)

    # 1) Upload
    with open(file_path, "rb") as f:
        files = {"file": f}
        headers = {"Accept": "application/json"}
        try:
            response = requests.post(url, files=files, headers=headers, timeout=60)
        except requests.RequestException as e:
            print(f"[ERROR] Request failed: {e}", file=sys.stderr)
            sys.exit(2)

    print("status code:", response.status_code)

    # 2) Check HTTP and JSON
    if response.status_code != 200:
        print("response:", response.text[:500])
        sys.exit(3)

    try:
        data = response.json()
    except ValueError:
        print("[ERROR] Not JSON response")
        print("response:", response.text[:500])
        sys.exit(4)

    if not data.get("success", False):
        print("[ERROR] Server returned failure:", data)
        sys.exit(5)

    print("response ok")

    # 3) Load original image (using PIL)
    img_pil = Image.open(file_path).convert("RGB")

    # If server returns different dimensions (should usually match), use server dimensions
    W = int(data.get("image_width", img_pil.width))
    H = int(data.get("image_height", img_pil.height))
    if (W, H) != (img_pil.width, img_pil.height):
        img_pil = img_pil.resize((W, H), Image.BICUBIC)

    boxes = data.get("ocr_boxes", [])
    img_pil = draw_boxes(img_pil, boxes)

    # 4) Display
    img_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
    cv2.imshow("OCR Preview", img_cv)
    print("Press any key on the image window to exit...")
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Sample Output:

Features

High-precision OCR powered by Apple’s Vision Framework
Supports multiple languages with automatic detection
Upload via web interface and receive OCR results within seconds
JSON API for easy integration into apps
100% local processing, no cloud, full privacy

Use Cases

Local OCR without cloud services
Share OCR services across devices in the same network
Build an OCR processing cluster using multiple iPhones

Name		Name	Last commit message	Last commit date
Latest commit History 91 Commits
OcrServer.xcodeproj		OcrServer.xcodeproj
OcrServer		OcrServer
.gitattributes		.gitattributes
.gitignore		.gitignore
LICENSE		LICENSE
README.fr.md		README.fr.md
README.ja.md		README.ja.md
README.ko.md		README.ko.md
README.md		README.md
README.zh-CN.md		README.zh-CN.md
README.zh-TW.md		README.zh-TW.md
image.jpg		image.jpg
image2.png		image2.png
image3.png		image3.png

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Repository files navigation

OCR Server

How to Use

Features

Use Cases

About

Uh oh!

Releases 8

Packages

Languages

License

riddleling/iOS-OCR-Server

Folders and files

Latest commit

History

Repository files navigation

OCR Server

How to Use

Features

Use Cases

About

Topics

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases 8

Packages 0

Languages

Packages