Turn your iPhone into a powerful local OCR server using Apple's Vision Framework. No cloud dependencies, unlimited usage, complete privacy.
English | 日本語 | 繁體中文 | 简体中文 | 한국어 | Français
- Launch the app and the server will start automatically
- Access the displayed IP address from any device on the same network
- Upload images to get text recognition results
- Integrate the service into your applications via API
- To ensure the app runs continuously without interruption, please enable iOS Guided Access mode and keep the screen on
- OCR Test: On your computer, open a web browser and navigate to the IP address displayed by the app to perform an OCR test.
-
API Example - Upload an image via
upload
API:curl -H "Accept: application/json" \ -X POST http://<YOUR IP>:8000/upload \ -F "[email protected]"
-
Python Upload Example:
import requests url = "http://10.0.1.11:8000/upload" # Replace with your IP address file_path = "01.png" with open(file_path, "rb") as f: files = {"file": f} headers = {"Accept": "application/json"} response = requests.post(url, files=files, headers=headers) print("status code:", response.status_code) print("response:", response.text)
-
The JSON response looks like this:
{ "success": true, "message": "File uploaded successfully", "ocr_result": "Hello\nWorld", "image_width": 1247, "image_height": 648, "ocr_boxes": [ { "text": "Hello", "x": 434.7201472051599, "y": 269.3123034733379, "w": 216.30970547749456, "h": 69.04344177246088 }, { "text": "World", "x": 429.5100030105896, "y": 420.4043957924413, "w": 242.85499225518635, "h": 73.382080078125 } ] }
image_width
andimage_height
represent the width and height of the image (in px),x
andy
represent the top-left origin of the text bounding box (in px),w
andh
represent the width and height of the text bounding box (in px). -
Python Example – Drawing text bounding boxes using
ocr_boxes
information:# # pip3 install requests pillow opencv-python # import os import sys import requests from PIL import Image, ImageDraw, ImageFont import numpy as np import cv2 url = "http://10.0.1.11:8000/upload" # Replace with your IP address file_path = "01.png" # ===== Select font (supports Chinese and English), font size auto-scales with box height ===== def pick_font(box_h_px: float): font_candidates = [ # macOS "/System/Library/Fonts/PingFang.ttc", "/System/Library/Fonts/STHeiti Light.ttc", # Windows r"C:\Windows\Fonts\msyh.ttc", r"C:\Windows\Fonts\msjh.ttc", r"C:\Windows\Fonts\arialuni.ttf", # Noto "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc", ] size = max(10, int(box_h_px * 0.25)) # Small font size = 25% of box height (minimum 10pt) for path in font_candidates: if os.path.exists(path): try: return ImageFont.truetype(path, size=size) except Exception: pass return ImageFont.load_default() # ===== Draw box and small text ===== def draw_boxes(img_pil: Image.Image, boxes, line_thickness: int = 5) -> Image.Image: draw = ImageDraw.Draw(img_pil) for b in boxes: try: x = float(b["x"]); y = float(b["y"]) w = float(b["w"]); h = float(b["h"]) text = str(b.get("text", "")) except Exception: continue # Red bounding box x2, y2 = x + w, y + h draw.rectangle([x, y, x2, y2], outline=(255, 0, 0), width=line_thickness) # Top-right label font = pick_font(h) # Text size # textbbox returns (l, t, r, b) l, t, r, b = draw.textbbox((0, 0), text, font=font) tw, th = (r - l), (b - t) pad = max(2, int(h * 0.06)) # Align label to top-right, not exceeding box or image edge tx = int(max(0, min(x2 - tw - pad, img_pil.width - tw - pad))) ty = int(max(0, min(y + pad, img_pil.height - th - pad))) # White background draw.rectangle([tx - pad, ty - pad, tx + tw + pad, ty + th + pad], fill=(255, 255, 255)) draw.text((tx, ty), text, font=font, fill=(20, 20, 20)) return img_pil def main(): if not os.path.exists(file_path): print(f"[ERROR] Image not found: {file_path}", file=sys.stderr) sys.exit(1) # 1) Upload with open(file_path, "rb") as f: files = {"file": f} headers = {"Accept": "application/json"} try: response = requests.post(url, files=files, headers=headers, timeout=60) except requests.RequestException as e: print(f"[ERROR] Request failed: {e}", file=sys.stderr) sys.exit(2) print("status code:", response.status_code) # 2) Check HTTP and JSON if response.status_code != 200: print("response:", response.text[:500]) sys.exit(3) try: data = response.json() except ValueError: print("[ERROR] Not JSON response") print("response:", response.text[:500]) sys.exit(4) if not data.get("success", False): print("[ERROR] Server returned failure:", data) sys.exit(5) print("response ok") # 3) Load original image (using PIL) img_pil = Image.open(file_path).convert("RGB") # If server returns different dimensions (should usually match), use server dimensions W = int(data.get("image_width", img_pil.width)) H = int(data.get("image_height", img_pil.height)) if (W, H) != (img_pil.width, img_pil.height): img_pil = img_pil.resize((W, H), Image.BICUBIC) boxes = data.get("ocr_boxes", []) img_pil = draw_boxes(img_pil, boxes) # 4) Display img_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) cv2.imshow("OCR Preview", img_cv) print("Press any key on the image window to exit...") cv2.waitKey(0) cv2.destroyAllWindows() if __name__ == "__main__": main()
Sample Output:
- High-precision OCR powered by Apple’s Vision Framework
- Supports multiple languages with automatic detection
- Upload via web interface and receive OCR results within seconds
- JSON API for easy integration into apps
- 100% local processing, no cloud, full privacy
- Local OCR without cloud services
- Share OCR services across devices in the same network
- Build an OCR processing cluster using multiple iPhones