diff --git a/keywords.config.yaml b/keywords.config.yaml index 55ad121a1..e1bf059bf 100644 --- a/keywords.config.yaml +++ b/keywords.config.yaml @@ -10,8 +10,10 @@ metadata_rules: - app-developer - node-operator - chain-operator - - protocol-developer - partner + - protocol-developer + - auditor + - governance-participant description: "Must select at least one valid persona" # CONTENT TYPE @@ -176,6 +178,11 @@ metadata_rules: - testnet - mainnet + # Superchain Categories + - superchain-registry + - security-council + - blockspace-charters + # TIMEFRAME timeframe: required_for: diff --git a/notes/metadata-update.md b/notes/metadata-update.md index fdf7f325c..bf75dd34e 100644 --- a/notes/metadata-update.md +++ b/notes/metadata-update.md @@ -13,29 +13,30 @@ Quick guide on using our metadata management system for the OP Stack documentati ## Using the Scripts 1. Run a dry run to preview changes: - * Process all .mdx files in a directory -```bash -pnpm metadata-batch-cli:dry "pages/app-developers/**/*.mdx" -``` - * Process a specific file with verbose output -```bash -pnpm metadata-batch-cli:verbose "pages/app-developers/example.mdx" -``` - * Process multiple directories -```bash -pnpm metadata-batch-cli:dry "pages/app-developers/**/*.mdx" "pages/node-operators/**/*.mdx" -``` + ```bash + # Process all .mdx files in a directory + pnpm metadata-batch-cli:dry "pages/superchain/*.mdx" + + # Process a specific file with verbose output + pnpm metadata-batch-cli:verbose "pages/app-developers/example.mdx" + + # Process multiple directories + pnpm metadata-batch-cli:dry "pages/app-developers/*.mdx" "pages/node-operators/*.mdx" + ``` 2. Apply the changes (remove :dry): -```bash -pnpm metadata-batch-cli "pages/app-developers/**/*.mdx" -``` + ```bash + pnpm metadata-batch-cli "pages/app-developers/*.mdx" + ``` ### Important Note About File Patterns -* Use `**/*.mdx` to match all .mdx files in a directory and its subdirectories -* The double asterisk `**` is required for recursive directory matching -* Single `/` patterns will not work correctly +Use these patterns to match files: + +* `directory/*.mdx` - matches all .mdx files in a specific directory +* `directory/subdirectory/*.mdx` - matches all .mdx files in a specific subdirectory +* `directory/*/*.mdx` - matches all .mdx files in all immediate subdirectories +* the quotes around the pattern are important to prevent shell expansion ### Configuration Files @@ -100,13 +101,13 @@ metadata_rules: * Commit your current changes * Ensure you're in the docs root directory * Check that keywords.config.yaml exists and is properly configured - * **Important**: All valid metadata values must be defined in keywords.config.yaml + * **Important**: All metadata values must be defined in keywords.config.yaml 2. **After Running** - * Review the manifest file - * Check validation messages in console output - * Verify metadata changes in files - * Review any files flagged for category review + * Review the categories assigned to each file + * Check that topics and personas are correct + * Verify any files marked for review + * Make sure network types (mainnet/testnet) are correct for registry files ## Content Analysis diff --git a/package.json b/package.json index bee69656d..27e1ab8cd 100644 --- a/package.json +++ b/package.json @@ -8,16 +8,16 @@ "fix": "eslint . --ext mdx --fix && pnpm spellcheck:fix && pnpm breadcrumbs && pnpm fix-redirects && pnpm metadata-batch-cli", "spellcheck:lint": "cspell lint \"**/*.mdx\"", "spellcheck:fix": "cspell --words-only --unique \"**/*.mdx\" | sort --ignore-case | uniq > words.txt", - "breadcrumbs": "npx ts-node-esm --skip-project utils/create-breadcrumbs.ts", - "check-breadcrumbs": "node --loader ts-node/esm utils/breadcrumbs.ts", - "check-redirects": "node --loader ts-node/esm utils/redirects.ts", - "fix-redirects": "node --loader ts-node/esm utils/fix-redirects.ts", - "link-checker": "node --loader ts-node/esm utils/link-checker.ts", - "metadata-batch-cli": "node --loader ts-node/esm utils/metadata-batch-cli.ts", + "breadcrumbs": "NODE_NO_WARNINGS=1 npx ts-node-esm --skip-project utils/create-breadcrumbs.ts", + "check-breadcrumbs": "NODE_NO_WARNINGS=1 node --loader ts-node/esm utils/breadcrumbs.ts", + "check-redirects": "NODE_NO_WARNINGS=1 node --loader ts-node/esm utils/redirects.ts", + "fix-redirects": "NODE_NO_WARNINGS=1 node --loader ts-node/esm utils/fix-redirects.ts", + "link-checker": "NODE_NO_WARNINGS=1 node --loader ts-node/esm utils/link-checker.ts", + "metadata-batch-cli": "NODE_NO_WARNINGS=1 node --loader ts-node/esm utils/metadata-batch-cli.ts", "metadata-batch-cli:dry": "pnpm metadata-batch-cli --dry-run", "metadata-batch-cli:verbose": "pnpm metadata-batch-cli --verbose", - "validate-metadata": "CHANGED_FILES=$(git diff --name-only HEAD) node --loader ts-node/esm utils/metadata-manager.ts", - "validate-pr-metadata": "node --loader ts-node/esm utils/metadata-manager.ts --pr", + "validate-metadata": "CHANGED_FILES=$(git diff --name-only HEAD) NODE_NO_WARNINGS=1 node --loader ts-node/esm utils/metadata-manager.ts", + "validate-pr-metadata": "NODE_NO_WARNINGS=1 node --loader ts-node/esm utils/metadata-manager.ts --pr", "dev": "next dev", "build": "next build", "start": "next start", diff --git a/pages/superchain/addresses.mdx b/pages/superchain/addresses.mdx index afc30c0b4..3402f0cbf 100644 --- a/pages/superchain/addresses.mdx +++ b/pages/superchain/addresses.mdx @@ -1,7 +1,17 @@ --- -title: Contract addresses -lang: en-US +title: Contract Addresses description: This reference guide lists all the contract addresses for Mainnet and Testnet. +lang: en-US +content_type: guide +topic: contract-addresses +personas: + - chain-operator + - protocol-developer +categories: + - superchain-registry + - mainnet + - testnet +is_imported_content: 'false' --- import { Callout } from 'nextra/components' diff --git a/pages/superchain/blockspace-charter.mdx b/pages/superchain/blockspace-charter.mdx index 207c5063f..cd8aae8b3 100644 --- a/pages/superchain/blockspace-charter.mdx +++ b/pages/superchain/blockspace-charter.mdx @@ -1,7 +1,19 @@ --- -title: Blockspace and Standard Rollup Charters +title: The Blockspace and Standard Rollup charters +description: >- + Learn about Blockspace Charters, the Standard Rollup Charter, and the + Superchain Registry. lang: en-US -description: Learn about Blockspace Charters, the Standard Rollup Charter, and the Superchain Registry. +content_type: guide +topic: the-blockspace-and-standard-rollup-charters +personas: + - chain-operator + - protocol-developer + - governance-participant +categories: + - blockspace-charters + - mainnet +is_imported_content: 'false' --- # The Blockspace and Standard Rollup charters diff --git a/pages/superchain/networks.mdx b/pages/superchain/networks.mdx index b3930cc04..318855f27 100644 --- a/pages/superchain/networks.mdx +++ b/pages/superchain/networks.mdx @@ -1,7 +1,18 @@ --- title: OP Stack networks and public RPC endpoints -lang: en-US description: Learn about the different OP Stack networks and public RPC endpoints. +lang: en-US +content_type: guide +topic: op-stack-networks-and-public-rpc-endpoints +personas: + - chain-operator + - app-developer + - protocol-developer +categories: + - superchain-registry + - mainnet + - testnet +is_imported_content: 'false' --- import { Callout } from 'nextra/components' diff --git a/pages/superchain/privileged-roles.mdx b/pages/superchain/privileged-roles.mdx index a5e808551..a5c1a120d 100644 --- a/pages/superchain/privileged-roles.mdx +++ b/pages/superchain/privileged-roles.mdx @@ -1,7 +1,18 @@ --- title: Privileged Roles in OP Stack Chains -lang: en-US description: Learn about the privileged roles in OP Stack chains. +lang: en-US +content_type: guide +topic: privileged-roles-in-op-stack-chains +personas: + - chain-operator + - protocol-developer + - auditor +categories: + - protocol + - security + - testnet +is_imported_content: 'false' --- import { Callout } from 'nextra/components' diff --git a/pages/superchain/standard-configuration.mdx b/pages/superchain/standard-configuration.mdx index 131e2c91b..cef55b056 100644 --- a/pages/superchain/standard-configuration.mdx +++ b/pages/superchain/standard-configuration.mdx @@ -1,7 +1,19 @@ --- title: What makes a chain standard? +description: >- + Learn what makes a chain standard, how op-deployer helps with standardization, + and why being standard matters. lang: en-US -description: Learn what makes a chain standard, how op-deployer helps with standardization, and why being standard matters. +content_type: guide +topic: what-makes-a-chain-standard +personas: + - chain-operator + - protocol-developer + - auditor +categories: + - protocol + - security +is_imported_content: 'false' --- import { Callout } from 'nextra/components' diff --git a/pages/superchain/superchain-explainer.mdx b/pages/superchain/superchain-explainer.mdx index b62e4b1f7..53013bea9 100644 --- a/pages/superchain/superchain-explainer.mdx +++ b/pages/superchain/superchain-explainer.mdx @@ -1,7 +1,21 @@ --- title: Superchain explainer +description: 'Learn about Optimism Superchain components, features, and roadmap.' lang: en-US -description: Learn about Optimism Superchain components, features, and roadmap. +content_type: guide +topic: superchain-explainer +personas: + - app-developer + - chain-operator + - node-operator + - protocol-developer + - auditor +categories: + - mainnet + - protocol + - security + - architecture +is_imported_content: 'false' --- import { Callout } from 'nextra/components' diff --git a/pages/superchain/superchain-registry.mdx b/pages/superchain/superchain-registry.mdx index faa37dc11..880b94f15 100644 --- a/pages/superchain/superchain-registry.mdx +++ b/pages/superchain/superchain-registry.mdx @@ -1,7 +1,19 @@ --- title: The Superchain Registry -lang: en-US description: Learn about the members of the Optimism Superchain ecosystem. +lang: en-US +content_type: guide +topic: the-superchain-registry +personas: + - chain-operator + - protocol-developer + - auditor +categories: + - superchain-registry + - protocol + - security + - configuration +is_imported_content: 'false' --- import { Callout } from 'nextra/components' diff --git a/pages/superchain/tokenlist.mdx b/pages/superchain/tokenlist.mdx index 73f4f4c9b..5c7c267c2 100644 --- a/pages/superchain/tokenlist.mdx +++ b/pages/superchain/tokenlist.mdx @@ -1,7 +1,18 @@ --- title: Bridged token addresses -lang: en-US description: This reference guide lists the correct bridged token addresses for each token. +lang: en-US +content_type: guide +topic: bridged-token-addresses +personas: + - chain-operator + - app-developer + - protocol-developer +categories: + - superchain-registry + - mainnet + - testnet +is_imported_content: 'false' --- import { Callout } from 'nextra/components' diff --git a/utils/metadata-analyzer.ts b/utils/metadata-analyzer.ts index eb459e048..192f1e5b8 100644 --- a/utils/metadata-analyzer.ts +++ b/utils/metadata-analyzer.ts @@ -1,7 +1,20 @@ import fs from 'fs' import path from 'path' +import yaml from 'js-yaml' import { MetadataResult, VALID_CATEGORIES, VALID_CONTENT_TYPES } from './types/metadata-types' +// Load YAML config +const yamlConfig = yaml.load(fs.readFileSync('keywords.config.yaml', 'utf8')) as { + metadata_rules: { + categories: { + file_patterns: { + superchain_registry: string[]; + security_council: string[]; + }; + }; + }; +}; + // Add interfaces for configuration interface AnalyzerConfig { defaultLang: string; @@ -64,9 +77,52 @@ class MetadataAnalysisError extends Error { } /** - * Returns default personas for app developer content + * Detects title from content by finding first h1 heading + */ +function detectTitle(content: string, filepath: string): string { + // Try to find first h1 heading + const h1Match = content.match(/^#\s+(.+)$/m); + if (h1Match) { + return h1Match[1].trim(); + } + + // Fallback to filename without extension + const filename = path.basename(filepath, '.mdx'); + return filename + .split('-') + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); +} + +/** + * Returns default personas based on content location */ export function getDefaultPersonas(filepath: string): string[] { + // Superchain content + if (filepath.includes('/superchain/')) { + const filename = path.basename(filepath); + + // Registry files are for chain operators + const registryFiles = ['networks.mdx', 'addresses.mdx', 'registry.mdx', 'tokenlist.mdx', 'superchain-registry.mdx']; + if (registryFiles.includes(filename)) { + return ['chain-operator']; + } + + // Security files are for chain operators + const securityFiles = ['privileged-roles.mdx', 'standard-configuration.mdx']; + if (securityFiles.includes(filename)) { + return ['chain-operator']; + } + + // Blockspace charter is for chain operators + if (filename.includes('blockspace-charter')) { + return ['chain-operator']; + } + + // Default superchain content is for all personas + return ['app-developer', 'chain-operator', 'node-operator']; + } + // Chain operator content if (filepath.includes('/chain-operators/')) { return ['chain-operator']; @@ -143,99 +199,91 @@ function getLandingPageCategories(filepath: string, content: string): Set, category: string): void { + if (isValidCategory(category)) { + categories.add(category); + } +} + // Helper functions for category detection function detectStackCategories(filepath: string, content: string): Set { const categories = new Set(); - // Base protocol content - if (filepath.match(/\/(rollup|transactions|components|differences|smart-contracts)\//)) { - categories.add('protocol'); - } - - // Research and specs - if (filepath.includes('/research/')) { - categories.add('protocol'); - if (content.toLowerCase().includes('block time')) { - categories.add('block-times'); - } - } - - // Root stack pages - if (filepath.match(/\/stack\/[^/]+\.mdx$/)) { - if (filepath.endsWith('features.mdx') || filepath.endsWith('beta-features.mdx')) { - categories.add('protocol'); - if (content.toLowerCase().includes('gas')) { - categories.add('custom-gas-token'); - } - if (content.toLowerCase().includes('data availability')) { - categories.add('alt-da'); - } - } - - // Core protocol pages - if (['rollup.mdx', 'transactions.mdx', 'components.mdx', 'differences.mdx', 'smart-contracts.mdx'] - .some(file => filepath.endsWith(file))) { - categories.add('protocol'); + if (filepath.includes('/superchain/')) { + const filename = path.basename(filepath); + + // Registry files + const registryFiles = ['networks.mdx', 'addresses.mdx', 'registry.mdx', 'tokenlist.mdx', 'superchain-registry.mdx']; + if (registryFiles.includes(filename)) { + addValidCategory(categories, 'superchain-registry'); } - // Development pages - if (['dev-node.mdx', 'getting-started.mdx', 'public-devnets.mdx'] - .some(file => filepath.endsWith(file))) { - categories.add('devnets'); + // Protocol and security files + const securityFiles = ['privileged-roles.mdx', 'standard-configuration.mdx']; + if (securityFiles.includes(filename)) { + addValidCategory(categories, 'protocol'); + addValidCategory(categories, 'security'); } - // Security pages - if (filepath.endsWith('security.mdx')) { - categories.add('security'); + // Blockspace charter + if (filename.includes('blockspace-charter')) { + addValidCategory(categories, 'blockspace-charters'); } - // Research pages - if (['research.mdx', 'fact-sheet.mdx', 'design-principles.mdx'] - .some(file => filepath.endsWith(file))) { - categories.add('protocol'); + // Network types based on content + const contentLower = content.toLowerCase(); + if (contentLower.includes('mainnet')) { + addValidCategory(categories, 'mainnet'); } - - // Interop pages - if (filepath.endsWith('interop.mdx')) { - categories.add('interop'); - categories.add('cross-chain-messaging'); + if (contentLower.includes('testnet') || contentLower.includes('sepolia')) { + addValidCategory(categories, 'testnet'); } } @@ -247,46 +295,46 @@ function detectOperatorCategories(filepath: string, content: string): Set // Common infrastructure if (content.toLowerCase().includes('kubernetes') || content.toLowerCase().includes('k8s')) { - categories.add('kubernetes-infrastructure'); + addValidCategory(categories, 'kubernetes-infrastructure'); } - // Superchain content - if (filepath.includes('/superchain/')) { - if (filepath.includes('blockspace')) { - categories.add('blockspace-charters'); - } - if (filepath.includes('registry') || - filepath.includes('addresses') || - filepath.includes('networks')) { - categories.add('superchain-registry'); - } - if (content.toLowerCase().includes('security') || - content.toLowerCase().includes('privileged')) { - categories.add('security-council'); - } + // Network types + if (content.toLowerCase().includes('mainnet')) { + addValidCategory(categories, 'mainnet'); + } + if (content.toLowerCase().includes('testnet') || + content.toLowerCase().includes('sepolia')) { + addValidCategory(categories, 'testnet'); } return categories; @@ -370,18 +411,18 @@ function detectCategories( ): string[] { const categories = new Set(); + // Stack categories + if (filepath.includes('/stack/') || filepath.includes('/superchain/')) { + const stackCategories = detectStackCategories(filepath, content); + stackCategories.forEach(category => categories.add(category)); + } + // Landing page categories if (isLandingPage(content, filepath, new Set())) { const landingCategories = getLandingPageCategories(filepath, content); landingCategories.forEach(category => categories.add(category)); } - // Stack categories - if (filepath.includes('/stack/')) { - const stackCategories = detectStackCategories(filepath, content); - stackCategories.forEach(category => categories.add(category)); - } - // Operator categories const operatorCategories = detectOperatorCategories(filepath, content); operatorCategories.forEach(category => categories.add(category)); @@ -502,54 +543,42 @@ export function analyzeContent( verbose: boolean = false, config: AnalyzerConfig = DEFAULT_CONFIG ): MetadataResult { - // Validate inputs - if (!filepath || typeof filepath !== 'string') { - throw new MetadataAnalysisError('Invalid file path provided'); - } - if (!content || typeof content !== 'string') { - throw new MetadataAnalysisError('Invalid content provided'); - } - if (typeof verbose !== 'boolean') { - throw new MetadataAnalysisError('Invalid verbose flag provided'); - } - - const detectionLog: string[] = []; - const warnings: string[] = []; - const detectedPages = new Set(); - try { + // Initialize detection tracking + const detectionLog: string[] = []; + const detectedPages = new Set(); + + // Get title first since we need it for topic + const title = detectTitle(content, filepath); + const topic = generateTopic(title); + const personas = getDefaultPersonas(filepath); const contentType = detectContentType(content, detectionLog, filepath, detectedPages); const categories = detectCategories(content, filepath, detectionLog, config); - // Only track warnings if verbose mode is on - if (contentType === 'NEEDS_REVIEW') { - warnings.push('Content type needs manual review'); - } - if (categories.length === 0) { - warnings.push('Categories may need manual review'); - } - - // Only log if verbose mode is on - if (verbose) { - config.logger(`\n📄 ${filepath}`); - config.logger(` Type: ${contentType}`); - config.logger(` Categories: ${categories.length ? categories.join(', ') : 'none'}`); - warnings.forEach(warning => { - config.logger(` ⚠️ ${warning}`); - }); - } - - return { + // Create result with all required fields + const result: MetadataResult = { content_type: contentType as typeof VALID_CONTENT_TYPES[number], categories, - detectionLog, - title: config.defaultTitle, + title, + topic, + personas, lang: config.defaultLang, description: config.defaultDescription, - topic: generateTopic(config.defaultTitle), - personas: getDefaultPersonas(filepath), + detectionLog, is_imported_content: 'false' }; + + // Log if verbose + if (verbose) { + config.logger(`\n📄 ${filepath}`); + config.logger(` Type: ${result.content_type}`); + config.logger(` Title: ${result.title}`); + config.logger(` Topic: ${result.topic}`); + config.logger(` Categories: ${result.categories.join(', ')}`); + config.logger(` Personas: ${result.personas.join(', ')}`); + } + + return result; } catch (error) { throw new MetadataAnalysisError(`Failed to analyze ${filepath}: ${error.message}`); } diff --git a/utils/metadata-batch-cli.ts b/utils/metadata-batch-cli.ts index a2112f81f..81c31a452 100644 --- a/utils/metadata-batch-cli.ts +++ b/utils/metadata-batch-cli.ts @@ -3,8 +3,10 @@ import { promises as fs } from 'fs' import path from 'path' import { fileURLToPath } from 'url' -import { updateMetadata } from './metadata-manager' +import { updateMetadata as updateMetadataFile } from './metadata-manager' import matter from 'gray-matter' +import { analyzeContent } from './metadata-analyzer' +import { MetadataResult } from './types/metadata-types' // @ts-ignore const globModule = await import('glob') @@ -35,6 +37,11 @@ interface ParentMetadata { categories: string[] } +interface CliOptions { + dryRun: boolean + verbose: boolean +} + async function findMdxFiles(pattern: string): Promise { const files = await globModule.glob(pattern, { ignore: ['pages/_*.mdx'] }) return files @@ -83,39 +90,37 @@ async function findParentMetadata(filePath: string): Promise { - if (verbose) { - console.log(`\nProcessing: ${filePath}`) +async function validateMetadata( + filepath: string, + options: { + dryRun?: boolean; + verbose?: boolean; + analysis: MetadataResult; + validateOnly: boolean; + prMode: boolean; } - - const result = await updateMetadata(filePath, { - dryRun, - prMode: !verbose - }) - - if (!result.isValid) { - throw new Error(`Failed to process ${filePath}: ${result.errors.join(', ')}`) +): Promise<{ isValid: boolean; errors: string[]; metadata: MetadataResult }> { + const errors: string[] = []; + + // Validate required fields using proper types + if (!options.analysis?.topic || typeof options.analysis.topic !== 'string') { + errors.push('Missing required field: topic'); } - - if (verbose) { - console.log('New metadata:', result.metadata) - if (result.errors.length > 0) { - console.log('Validation warnings:', result.errors) - } - if (dryRun) { - console.log('Dry run - no changes made') - } + if (!Array.isArray(options.analysis?.personas) || options.analysis.personas.length === 0) { + errors.push('Missing required field: personas'); + } + if (!Array.isArray(options.analysis?.categories)) { + errors.push('Missing required field: categories'); + } + if (!options.analysis?.content_type) { + errors.push('Missing required field: content_type'); } return { - categories: result.metadata.categories, - contentType: result.metadata.content_type, - isImported: result.metadata.is_imported_content === 'true' - } + isValid: errors.length === 0, + errors, + metadata: options.analysis + }; } async function validateFilePaths(files: string[]): Promise { @@ -124,9 +129,7 @@ async function validateFilePaths(files: string[]): Promise { for (const file of files) { try { - // Check if file exists and is readable await fs.access(file, fs.constants.R_OK) - // Check if it's actually a file (not a directory) const stats = await fs.stat(file) if (stats.isFile()) { validFiles.push(file) @@ -146,55 +149,108 @@ async function validateFilePaths(files: string[]): Promise { return validFiles } -async function processFiles(files: string[]): Promise { - let hasErrors = false - let processedCount = 0 +function truncateString(str: string, maxLength: number = 80): string { + return str.length > maxLength ? str.slice(0, maxLength - 3) + '...' : str +} + +async function processFiles(files: string[], options: CliOptions): Promise<{ + hasErrors: boolean; + stats: { + total: number; + successful: number; + needsReview: number; + failed: number; + }; +}> { + const stats = { + total: files.length, + successful: 0, + needsReview: 0, + failed: 0 + } for (const file of files) { try { - const result = await updateMetadata(file, { dryRun: true, prMode: true }) + const content = await fs.readFile(file, 'utf8') + const { data: frontmatter } = matter(content) + const analysis = analyzeContent(content, file, options.verbose) + const result = await updateMetadataFile(file, { + dryRun: true, + verbose: false, + analysis, + validateOnly: true, + prMode: true + }) + + console.log(`\n${colors.blue}📄 ${file}${colors.reset}`) + console.log(` Title: ${analysis.title || frontmatter.title || ''}`) + console.log(` Description: ${truncateString(frontmatter.description || '')}`) + console.log(` Lang: ${frontmatter.lang || analysis.lang || 'en-US'}`) + console.log(` Content Type: ${analysis.content_type}`) + console.log(` Topic: ${analysis.topic}`) + console.log(` Personas: ${analysis.personas.join(', ')}`) + console.log(` Categories: ${analysis.categories?.length ? analysis.categories.join(', ') : 'none'}`) + if (!result.isValid) { - hasErrors = true - console.log(`\n${colors.red}Error in ${file}:${colors.reset}`) + console.log(' ⚠️ Review needed:') result.errors.forEach(error => { - console.log(` ${colors.yellow}→${colors.reset} ${error}`) + console.log(` → ${error}`) }) + stats.needsReview++ + } else { + if (!options.dryRun) { + await updateMetadataFile(file, { + dryRun: false, + verbose: options.verbose || false, + analysis, + validateOnly: false, + prMode: false + }) + console.log(' ✓ Updates applied') + } + stats.successful++ } - processedCount++ } catch (e) { - console.log(`\n${colors.red}Failed to process ${file}: ${e}${colors.reset}`) - hasErrors = true + stats.failed++ + console.log(`${colors.yellow}⚠️ Error processing ${file}:${colors.reset} ${e}`) } } - console.log( - hasErrors - ? `\n${colors.red}✖ Found metadata issues in some files${colors.reset}` - : `\n${colors.green}✓ Validated ${processedCount} files successfully${colors.reset}` - ) + // Print summary + console.log('\nSummary:') + console.log(`${colors.green}✓ ${stats.successful} files processed${colors.reset}`) + if (stats.needsReview > 0) { + console.log(`${colors.yellow}⚠️ ${stats.needsReview} files need review${colors.reset}`) + } + if (stats.failed > 0) { + console.log(`${colors.yellow}⚠️ ${stats.failed} files need manual updates${colors.reset}`) + } - return hasErrors + return { hasErrors: stats.failed > 0, stats } } async function main() { try { console.log('Checking metadata...') - // Get modified files from git and validate input - const gitOutput = process.env.CHANGED_FILES || '' - if (!gitOutput.trim()) { - console.log(`${colors.green}✓ No files to check${colors.reset}`) - process.exit(0) + let modifiedFiles: string[] = [] + + // Check if we have a direct glob pattern argument + const globPattern = process.argv.find(arg => arg.includes('*.mdx')) + if (globPattern) { + modifiedFiles = await globModule.glob(globPattern) + } else { + // Fall back to CHANGED_FILES if no glob pattern + const gitOutput = process.env.CHANGED_FILES || '' + modifiedFiles = gitOutput + .split('\n') + .filter(file => file.trim()) + .filter(file => file.endsWith('.mdx')) + .map(file => path.resolve(process.cwd(), file)) } - const modifiedFiles = gitOutput - .split('\n') - .filter(file => file.trim()) // Remove empty lines - .filter(file => file.endsWith('.mdx')) - .map(file => path.resolve(process.cwd(), file)) - if (modifiedFiles.length === 0) { - console.log(`${colors.green}✓ No MDX files modified${colors.reset}`) + console.log(`${colors.green}✓ No MDX files to check${colors.reset}`) process.exit(0) } @@ -202,17 +258,23 @@ async function main() { const validFiles = await validateFilePaths(modifiedFiles) if (validFiles.length === 0) { - console.log(`${colors.red}✖ No valid files to check${colors.reset}`) - process.exit(1) + console.log(`${colors.yellow}⚠️ No valid files to check${colors.reset}`) + process.exit(0) } console.log(`Found ${validFiles.length} valid files to check`) - const hasErrors = await processFiles(validFiles) - process.exit(hasErrors ? 1 : 0) + const options: CliOptions = { + dryRun: process.argv.includes('--dry-run'), + verbose: process.argv.includes('--verbose') + } + + const { hasErrors, stats } = await processFiles(validFiles, options) + // Don't exit with error code - we want this to be non-blocking + process.exit(0) } catch (error) { - console.error(`${colors.red}Error: ${error}${colors.reset}`) - process.exit(1) + console.error(`${colors.yellow}⚠️ Error: ${error}${colors.reset}`) + process.exit(0) } } diff --git a/utils/metadata-manager.ts b/utils/metadata-manager.ts index 52a4b7015..699692e86 100644 --- a/utils/metadata-manager.ts +++ b/utils/metadata-manager.ts @@ -3,7 +3,7 @@ import path from 'path' import matter from 'gray-matter' import yaml from 'js-yaml' import { analyzeContent } from './metadata-analyzer' -import type { MetadataResult } from './types/metadata-types' +import { MetadataResult, MetadataOptions, ValidationResult } from './types/metadata-types' // Add the interfaces at the top of the file interface ValidationOptions { @@ -12,13 +12,25 @@ interface ValidationOptions { validateOnly?: boolean } -interface ValidationResult { - isValid: boolean - errors: string[] +interface UpdateOptions { + dryRun?: boolean + validateOnly?: boolean + prMode?: boolean + verbose?: boolean + analysis?: MetadataResult } // Validation functions -async function validateMetadata(metadata: MetadataResult, filepath: string, options: ValidationOptions = {}): Promise { +async function validateMetadata( + metadata: MetadataResult, + filepath: string, + options: { + dryRun?: boolean; + verbose?: boolean; + validateOnly?: boolean; + prMode?: boolean; + } = {} +): Promise { const errors = [] as string[] const config = await loadConfig('keywords.config.yaml') @@ -106,61 +118,63 @@ export async function generateMetadata(filePath: string): Promise { +export async function updateMetadata( + filepath: string, + options: MetadataOptions +): Promise { try { - const content = await fs.readFile(filePath, 'utf8') - const { data: currentMetadata } = matter(content) + const content = await fs.readFile(filepath, 'utf8') + const { data: frontmatter, content: docContent } = matter(content) - if (options.validateOnly) { - const validationResult = await validateMetadata(currentMetadata as MetadataResult, filePath) - return { - isValid: validationResult.isValid, - errors: validationResult.errors - } + // Guard against undefined analysis with optional chaining + const safeAnalysis = options?.analysis || {} as MetadataResult + + // Create new metadata object with all fields + const newMetadata = { + title: safeAnalysis.title || frontmatter.title || '', + description: frontmatter.description || safeAnalysis.description || '', + lang: frontmatter.lang || safeAnalysis.lang || 'en-US', + content_type: safeAnalysis.content_type, + topic: safeAnalysis.topic || '', + personas: safeAnalysis.personas || [], + categories: safeAnalysis.categories || [], + is_imported_content: safeAnalysis.is_imported_content || 'false' } - const newMetadata = await generateMetadata(filePath) - const validationResult = await validateMetadata(newMetadata, filePath) + // Validate metadata in all cases + const validationResult = await validateMetadata(newMetadata, filepath, options) - if (!options.dryRun && validationResult.isValid) { - const updatedContent = matter.stringify(content, newMetadata) - const tempPath = `${filePath}.tmp` - - try { - await fs.writeFile(tempPath, updatedContent) - await fs.rename(tempPath, filePath) - } catch (writeError) { - try { - await fs.unlink(tempPath) - } catch (cleanupError) { - // Ignore cleanup errors + // Check validation mode + if (options.validateOnly || options.prMode) { + return { + isValid: validationResult.isValid, + errors: validationResult.errors, + suggestions: { + categories: safeAnalysis.categories, + content_type: safeAnalysis.content_type } - throw writeError } } - if (validationResult.errors.length > 0 && !options.prMode) { - console.log(`\nMetadata validation errors in ${filePath}:`) - validationResult.errors.forEach(error => console.log(`- ${error}`)) + // Only write if not in dry run mode and validation passed + if (!options.dryRun && validationResult.isValid) { + const updatedContent = matter.stringify(docContent, newMetadata) + await fs.writeFile(filepath, updatedContent, 'utf8') } return { isValid: validationResult.isValid, errors: validationResult.errors, - metadata: newMetadata + suggestions: { + categories: safeAnalysis.categories, + content_type: safeAnalysis.content_type + } } } catch (error) { return { isValid: false, - errors: [`Error processing file: ${error.message}`] + errors: [`Failed to update metadata for ${filepath}: ${error.message}`], + suggestions: {} } } } @@ -180,7 +194,15 @@ export async function validatePRChanges(): Promise { let hasErrors = false for (const file of modifiedFiles) { - const result = await updateMetadata(file, { validateOnly: true, prMode: true }) + const content = await fs.readFile(file, 'utf8') + const analysis = analyzeContent(content, file) + const result = await updateMetadata(file, { + validateOnly: true, + prMode: true, + dryRun: true, + verbose: false, + analysis + }) if (!result.isValid) { hasErrors = true console.error(`\n${file}:`) @@ -211,11 +233,31 @@ if (import.meta.url === `file://${process.argv[1]}`) { const validateFiles = async () => { let hasErrors = false for (const file of modifiedFiles) { - const result = await updateMetadata(file, { validateOnly: true, prMode: true }) + const content = await fs.readFile(file, 'utf8') + const analysis = analyzeContent(content, file) + const result = await updateMetadata(file, { + validateOnly: true, + prMode: true, + dryRun: true, + verbose: false, + analysis + }) if (!result.isValid) { console.log('\x1b[33m⚠️ Metadata validation warnings:\x1b[0m') console.log(`\nFile: ${file}`) result.errors.forEach(error => console.log(` → ${error}`)) + + // Show suggestions if available + if (result.suggestions?.categories?.length || result.suggestions?.content_type) { + console.log('\nSuggested metadata:') + if (result.suggestions.content_type) { + console.log(` content_type: ${result.suggestions.content_type}`) + } + if (result.suggestions.categories?.length) { + console.log(` categories: ${result.suggestions.categories.join(', ')}`) + } + } + console.log('\nTo fix these warnings:') console.log('1. Add required metadata to your MDX file\'s frontmatter:') console.log('```yaml') diff --git a/utils/types/metadata-types.ts b/utils/types/metadata-types.ts index 543c114f2..a6702c994 100644 --- a/utils/types/metadata-types.ts +++ b/utils/types/metadata-types.ts @@ -23,7 +23,37 @@ if (!isValidConfig(yamlContent)) { export const VALID_PERSONAS = yamlContent.metadata_rules.persona.validation_rules[0].enum as readonly string[] export const VALID_CONTENT_TYPES = yamlContent.metadata_rules.content_type.validation_rules[0].enum as readonly string[] -export const VALID_CATEGORIES = yamlContent.metadata_rules.categories.values as readonly string[] +export const VALID_CATEGORIES = [ + 'protocol', + 'infrastructure', + 'sequencer', + 'op-batcher', + 'rollup-node', + 'op-geth', + 'fault-proofs', + 'op-challenger', + 'cannon', + 'l1-deployment-upgrade-tooling', + 'l2-deployment-upgrade-tooling', + 'monitorism', + 'security', + 'automated-pause', + 'kubernetes-infrastructure', + 'cross-chain-messaging', + 'standard-bridge', + 'interoperable-message-passing', + 'hardhat', + 'foundry', + 'ethers', + 'viem', + 'supersim', + 'devnets', + 'mainnet', + 'testnet', + 'superchain-registry', + 'security-council', + 'blockspace-charters' +] as const; export interface ValidationRule { pattern?: string @@ -79,12 +109,12 @@ export interface MetadataResult { lang: string description: string topic: string - personas: Array // Explicitly typed array + personas: Array content_type: typeof VALID_CONTENT_TYPES[number] - categories: Array // Explicitly typed array + categories: Array is_imported_content: string content?: string - detectionLog?: Array // Explicitly typed array + detectionLog?: Array } export interface ProcessedFile { @@ -95,5 +125,22 @@ export interface ProcessedFile { export interface Manifest { timestamp: string - processed_files: Array // Explicitly typed array + processed_files: Array +} + +export interface MetadataOptions { + dryRun: boolean; + verbose: boolean; + analysis: MetadataResult; + validateOnly: boolean; + prMode: boolean; +} + +export interface ValidationResult { + isValid: boolean; + errors: string[]; + suggestions?: { + categories?: string[]; + content_type?: string; + }; } \ No newline at end of file