From b99b80c8818538e21ddc72f785f9cfa96535aa86 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 8 Apr 2025 12:23:26 +0000 Subject: [PATCH] Add Docker support and Streamlit web interface --- Dockerfile | 27 +++++++ README.md | 50 ++++++++++--- app.py | 182 +++++++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 15 ++++ requirements.txt | 1 + 5 files changed, 264 insertions(+), 11 deletions(-) create mode 100644 Dockerfile create mode 100644 app.py create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..88592a2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM python:3.10-slim + +WORKDIR /app + +# Copy requirements first for better caching +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application +COPY . . + +# Create necessary directories +RUN mkdir -p logs output + +# Expose the Streamlit port +EXPOSE 8501 + +# Set environment variables +ENV PYTHONUNBUFFERED=1 + +# Copy .env file into the container +COPY .env .env + +# Command to run the Streamlit app +CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] \ No newline at end of file diff --git a/README.md b/README.md index 05830c4..453786f 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,29 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket ## 🚀 Getting Started +### Option 1: Using Docker (Recommended) + +1. Clone this repository + +2. Configure your environment variables in the `.env` file: + ```bash + # Copy the sample .env file + cp .env.sample .env + + # Edit the .env file with your credentials + # GEMINI_PROJECT_ID=your-project-id + # GITHUB_TOKEN=your-github-token + ``` + +3. Run the application using Docker Compose: + ```bash + docker-compose up -d + ``` + +4. Access the Streamlit web interface at http://localhost:8501 + +### Option 2: Manual Installation + 1. Clone this repository 2. Install dependencies: @@ -75,17 +98,22 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket python utils/call_llm.py ``` -4. Generate a complete codebase tutorial by running the main script: - ```bash - python main.py https://github.com/username/repo --include "*.py" "*.js" --exclude "tests/*" --max-size 50000 - ``` - - `repo_url` - URL of the GitHub repository (required) - - `-n, --name` - Project name (optional, derived from URL if omitted) - - `-t, --token` - GitHub token (or set GITHUB_TOKEN environment variable) - - `-o, --output` - Output directory (default: ./output) - - `-i, --include` - Files to include (e.g., "*.py" "*.js") - - `-e, --exclude` - Files to exclude (e.g., "tests/*" "docs/*") - - `-s, --max-size` - Maximum file size in bytes (default: 100KB) +4. Run the Streamlit web interface: + ```bash + streamlit run app.py + ``` + + Or generate a complete codebase tutorial directly using the command line: + ```bash + python main.py https://github.com/username/repo --include "*.py" "*.js" --exclude "tests/*" --max-size 50000 + ``` + - `repo_url` - URL of the GitHub repository (required) + - `-n, --name` - Project name (optional, derived from URL if omitted) + - `-t, --token` - GitHub token (or set GITHUB_TOKEN environment variable) + - `-o, --output` - Output directory (default: ./output) + - `-i, --include` - Files to include (e.g., "*.py" "*.js") + - `-e, --exclude` - Files to exclude (e.g., "tests/*" "docs/*") + - `-s, --max-size` - Maximum file size in bytes (default: 100KB) The application will crawl the repository, analyze the codebase structure, generate tutorial content, and save the output in the specified directory (default: ./output). diff --git a/app.py b/app.py new file mode 100644 index 0000000..c7aca22 --- /dev/null +++ b/app.py @@ -0,0 +1,182 @@ +import streamlit as st +import os +import dotenv +from flow import create_tutorial_flow + +# Load environment variables +dotenv.load_dotenv() + +# Default file patterns +DEFAULT_INCLUDE_PATTERNS = { + "*.py", "*.js", "*.ts", "*.go", "*.java", "*.pyi", "*.pyx", + "*.c", "*.cc", "*.cpp", "*.h", "*.md", "*.rst", "Dockerfile", + "Makefile", "*.yaml", "*.yml" +} + +DEFAULT_EXCLUDE_PATTERNS = { + "*test*", "tests/*", "docs/*", "examples/*", "v1/*", + "dist/*", "build/*", "experimental/*", "deprecated/*", + "legacy/*", ".git/*", ".github/*" +} + +# Set page config +st.set_page_config( + page_title="Codebase Tutorial Generator", + page_icon="📚", + layout="wide" +) + +# Title and description +st.title("📚 Codebase Tutorial Generator") +st.markdown(""" +This app generates comprehensive tutorials for GitHub codebases using AI. +Simply provide a GitHub repository URL and customize the generation settings. +""") + +# Sidebar for configuration +with st.sidebar: + st.header("Configuration") + + # GitHub token input + github_token = st.text_input( + "GitHub Token (optional)", + value=os.environ.get("GITHUB_TOKEN", ""), + type="password", + help="Personal access token for GitHub API. Helps avoid rate limits." + ) + + # Output directory + output_dir = st.text_input( + "Output Directory", + value="output", + help="Directory where the tutorial will be saved" + ) + + # Advanced options + with st.expander("Advanced Options"): + # File size limit + max_file_size = st.number_input( + "Max File Size (bytes)", + value=100000, + min_value=1000, + help="Maximum file size to process (in bytes)" + ) + + # Include patterns + include_patterns_str = st.text_area( + "Include Patterns", + value="\n".join(DEFAULT_INCLUDE_PATTERNS), + help="File patterns to include (one per line)" + ) + + # Exclude patterns + exclude_patterns_str = st.text_area( + "Exclude Patterns", + value="\n".join(DEFAULT_EXCLUDE_PATTERNS), + help="File patterns to exclude (one per line)" + ) + +# Main form +with st.form("tutorial_form"): + # Repository URL + repo_url = st.text_input( + "GitHub Repository URL", + placeholder="https://github.com/username/repository", + help="URL of the public GitHub repository" + ) + + # Project name (optional) + project_name = st.text_input( + "Project Name (optional)", + help="Custom name for the project (derived from URL if omitted)" + ) + + # Submit button + submit_button = st.form_submit_button("Generate Tutorial") + +# Process form submission +if submit_button: + if not repo_url: + st.error("Please enter a GitHub repository URL") + else: + # Show progress + progress_bar = st.progress(0) + status_text = st.empty() + + # Parse include/exclude patterns + include_patterns = set(filter(None, include_patterns_str.split("\n"))) + exclude_patterns = set(filter(None, exclude_patterns_str.split("\n"))) + + # Initialize shared dictionary + shared = { + "repo_url": repo_url, + "project_name": project_name if project_name else None, + "github_token": github_token if github_token else os.environ.get("GITHUB_TOKEN"), + "output_dir": output_dir, + "include_patterns": include_patterns, + "exclude_patterns": exclude_patterns, + "max_file_size": max_file_size, + "files": [], + "abstractions": [], + "relationships": {}, + "chapter_order": [], + "chapters": [], + "final_output_dir": None + } + + try: + # Create and run the flow + status_text.text("Starting tutorial generation...") + progress_bar.progress(10) + + tutorial_flow = create_tutorial_flow() + + # Update status for each node + status_text.text("Fetching repository...") + progress_bar.progress(20) + + # Run the flow with progress updates + # Note: In a real implementation, you would need to modify the flow + # to provide progress updates or use callbacks + result = tutorial_flow.run(shared) + + progress_bar.progress(100) + status_text.text("Tutorial generation complete!") + + # Display result + if result.get("final_output_dir"): + st.success(f"Tutorial generated successfully in: {result['final_output_dir']}") + + # Provide download links if files are accessible + st.markdown("### Download Tutorial Files") + for file in os.listdir(result["final_output_dir"]): + file_path = os.path.join(result["final_output_dir"], file) + if os.path.isfile(file_path): + with open(file_path, "rb") as f: + st.download_button( + label=f"Download {file}", + data=f, + file_name=file, + mime="text/markdown" + ) + else: + st.warning("Tutorial generation completed but output directory not found.") + + except Exception as e: + st.error(f"Error generating tutorial: {str(e)}") + st.exception(e) + +# Display information about the app +st.markdown("---") +st.markdown(""" +### How it works +1. The app clones the GitHub repository +2. It analyzes the codebase structure and identifies key abstractions +3. It determines relationships between components +4. It generates tutorial chapters in a logical order +5. Finally, it combines everything into a comprehensive tutorial + +### Requirements +- A public GitHub repository +- Google Gemini API access (configured via environment variables) +""") \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..a7c117c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +version: '3' + +services: + tutorial-generator: + build: + context: . + dockerfile: Dockerfile + ports: + - "8501:8501" + volumes: + - ./output:/app/output + - ./logs:/app/logs + env_file: + - .env + restart: unless-stopped \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 06253bc..c5f8912 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ gitpython>=3.1.0 google-cloud-aiplatform>=1.25.0 google-genai>=1.9.0 python-dotenv>=1.0.0 +streamlit>=1.32.0