diff --git a/.github/scripts/bulk_find_replace.py b/.github/scripts/bulk_find_replace.py new file mode 100755 index 0000000..2169ac4 --- /dev/null +++ b/.github/scripts/bulk_find_replace.py @@ -0,0 +1,336 @@ +#!/usr/bin/env python3 +""" +Bulk find and replace script for GitHub organization repositories. +Replaces: +- hybrid-cloud-patterns.io -> validatedpatterns.io (except in apiVersion lines) +- mailto: references -> team-validatedpatterns +""" + +import os +import re +import subprocess +import sys +from pathlib import Path +from github import Github +from github.GithubException import GithubException + + +def clone_repo(repo_url, temp_dir): + """Clone a repository to a temporary directory.""" + repo_name = repo_url.split('/')[-1].replace('.git', '') + clone_path = os.path.join(temp_dir, repo_name) + + if os.path.exists(clone_path): + subprocess.run(['rm', '-rf', clone_path], check=True) + + subprocess.run(['git', 'clone', repo_url, clone_path], check=True) + return clone_path + + +def should_process_file(file_path): + """Determine if a file should be processed.""" + # Skip binary files and common non-text files + skip_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.pdf', + '.zip', '.tar', '.gz', '.bz2', '.woff', '.woff2', '.ttf', + '.eot', '.otf', '.mp4', '.mp3', '.avi', '.mov'} + + # Skip common directories + skip_dirs = {'.git', 'node_modules', '.venv', 'venv', '__pycache__', + '.pytest_cache', 'dist', 'build', '.tox', '.eggs'} + + file_path_str = str(file_path) + + # Check if in skip directory + for skip_dir in skip_dirs: + if f'/{skip_dir}/' in file_path_str or file_path_str.startswith(skip_dir): + return False + + # Check extension + if any(file_path_str.lower().endswith(ext) for ext in skip_extensions): + return False + + return True + + +def replace_in_file(file_path, dry_run=False): + """Replace patterns in a file.""" + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + + original_content = content + changes_made = False + + # Replace hybrid-cloud-patterns.io with validatedpatterns.io + # But skip lines that contain apiVersion: hybrid-cloud-patterns.io + lines = content.split('\n') + new_lines = [] + + for line in lines: + original_line = line + + # Skip apiVersion lines + if re.search(r'apiVersion:\s*hybrid-cloud-patterns\.io', line, re.IGNORECASE): + new_lines.append(line) + continue + + # Replace hybrid-cloud-patterns.io with validatedpatterns.io + if 'hybrid-cloud-patterns.io' in line: + line = line.replace('hybrid-cloud-patterns.io', 'validatedpatterns.io') + changes_made = True + + # Replace mailto: references + # Pattern: mailto:something@domain -> mailto:team-validatedpatterns + # Skip if already mailto:team-validatedpatterns + mailto_pattern = r'mailto:([^\s<>"\'\)]+)' + if re.search(mailto_pattern, line, re.IGNORECASE): + # Check if it's already team-validatedpatterns + if not re.search(r'mailto:team-validatedpatterns', line, re.IGNORECASE): + line = re.sub(mailto_pattern, 'mailto:team-validatedpatterns', line, flags=re.IGNORECASE) + if line != original_line: + changes_made = True + + new_lines.append(line) + + new_content = '\n'.join(new_lines) + + if changes_made and not dry_run: + with open(file_path, 'w', encoding='utf-8') as f: + f.write(new_content) + return True + elif changes_made: + return True + + return False + except Exception as e: + print(f"Error processing {file_path}: {e}") + return False + + +def process_repository(repo_path, dry_run=False): + """Process all files in a repository.""" + changes = [] + + for root, dirs, files in os.walk(repo_path): + # Remove .git from dirs to avoid processing it + if '.git' in dirs: + dirs.remove('.git') + + for file in files: + file_path = Path(root) / file + + if not should_process_file(file_path): + continue + + if replace_in_file(file_path, dry_run): + changes.append(str(file_path.relative_to(repo_path))) + + return changes + + +def get_default_branch(repo_path): + """Get the default branch name for the repository.""" + try: + result = subprocess.run(['git', 'symbolic-ref', 'refs/remotes/origin/HEAD'], + cwd=repo_path, capture_output=True, text=True, check=True) + default_branch = result.stdout.strip().replace('refs/remotes/origin/', '') + return default_branch + except subprocess.CalledProcessError: + # Try common branch names + for branch in ['main', 'master', 'develop']: + result = subprocess.run(['git', 'show-ref', f'refs/remotes/origin/{branch}'], + cwd=repo_path, capture_output=True) + if result.returncode == 0: + return branch + return 'main' # Default fallback + + +def create_branch_and_pr(repo, repo_path, changes, dry_run=False): + """Create a branch, commit changes, and open a PR.""" + if not changes: + print(f"No changes needed in {repo.name}") + return None + + branch_name = 'bulk-find-replace-update' + default_branch = get_default_branch(repo_path) + + try: + # Check if branch already exists + try: + existing_branch = repo.get_branch(branch_name) + if existing_branch: + print(f"Branch {branch_name} already exists in {repo.name}, skipping") + return None + except GithubException: + pass # Branch doesn't exist, which is what we want + + if dry_run: + print(f"[DRY RUN] Would create branch and PR for {repo.name} with {len(changes)} changed files") + return None + + # Ensure we're on the default branch + subprocess.run(['git', 'checkout', default_branch], cwd=repo_path, check=True) + + # Create and checkout branch + subprocess.run(['git', 'checkout', '-b', branch_name], cwd=repo_path, check=True) + + # Stage all changes + subprocess.run(['git', 'add', '-A'], cwd=repo_path, check=True) + + # Check if there are any changes to commit + result = subprocess.run(['git', 'diff', '--cached', '--quiet'], cwd=repo_path) + if result.returncode == 0: + print(f"No changes to commit in {repo.name}") + subprocess.run(['git', 'checkout', default_branch], cwd=repo_path) + return None + + # Commit changes + commit_message = """Bulk find and replace: hybrid-cloud-patterns.io -> validatedpatterns.io + +- Replaced hybrid-cloud-patterns.io with validatedpatterns.io (excluding apiVersion references) +- Updated mailto: references to team-validatedpatterns + +This PR was automatically generated by the bulk find and replace workflow.""" + + subprocess.run(['git', 'commit', '-m', commit_message], cwd=repo_path, check=True) + + # Push branch + subprocess.run(['git', 'push', 'origin', branch_name], cwd=repo_path, check=True) + + # Create PR + pr_title = "Bulk find and replace: hybrid-cloud-patterns.io -> validatedpatterns.io" + pr_body = f"""This PR updates references from `hybrid-cloud-patterns.io` to `validatedpatterns.io` and updates mailto references. + +**Changes:** +- Replaced `hybrid-cloud-patterns.io` with `validatedpatterns.io` (excluding `apiVersion` references) +- Updated `mailto:` references to `team-validatedpatterns` + +**Files changed:** {len(changes)} + +This PR was automatically generated by the bulk find and replace workflow.""" + + pr = repo.create_pull( + title=pr_title, + body=pr_body, + head=branch_name, + base=default_branch + ) + + print(f"Created PR #{pr.number} in {repo.name}: {pr.html_url}") + return pr + + except subprocess.CalledProcessError as e: + print(f"Error creating branch/PR for {repo.name}: {e}") + return None + except GithubException as e: + print(f"GitHub API error for {repo.name}: {e}") + return None + + +def main(): + github_token = os.environ.get('GITHUB_TOKEN') + organization = os.environ.get('ORGANIZATION') + repositories_input = os.environ.get('REPOSITORIES', '').strip() + dry_run = os.environ.get('DRY_RUN', 'false').lower() == 'true' + + if not github_token: + print("Error: GITHUB_TOKEN environment variable is required") + sys.exit(1) + + if not organization: + print("Error: ORGANIZATION environment variable is required") + sys.exit(1) + + g = Github(github_token) + org = g.get_organization(organization) + + # Parse repository list if provided + selected_repos = None + if repositories_input: + selected_repos = [r.strip() for r in repositories_input.split(',') if r.strip()] + print(f"Processing selected repositories: {', '.join(selected_repos)}") + else: + print(f"Processing all repositories in organization: {organization}") + + print(f"Dry run mode: {dry_run}") + + temp_dir = '/tmp/bulk_find_replace' + os.makedirs(temp_dir, exist_ok=True) + + repos_processed = 0 + repos_with_changes = 0 + prs_created = 0 + repos_skipped = 0 + found_repos = set() + + try: + for repo in org.get_repos(): + # Filter by selected repositories if provided + if selected_repos: + if repo.name not in selected_repos: + repos_skipped += 1 + continue + found_repos.add(repo.name) + repos_processed += 1 + print(f"\n[{repos_processed}] Processing {repo.name}...") + + # Skip archived repositories + if repo.archived: + print(f" Skipping archived repository") + continue + + # Skip empty repositories + if repo.size == 0: + print(f" Skipping empty repository") + continue + + try: + # Clone repository + repo_path = clone_repo(repo.clone_url, temp_dir) + + # Configure git user for commits + subprocess.run(['git', 'config', 'user.name', 'github-actions[bot]'], + cwd=repo_path, check=True) + subprocess.run(['git', 'config', 'user.email', + 'github-actions[bot]@users.noreply.github.com'], + cwd=repo_path, check=True) + + # Process files + changes = process_repository(repo_path, dry_run) + + if changes: + repos_with_changes += 1 + print(f" Found {len(changes)} files with changes") + + # Create branch and PR + pr = create_branch_and_pr(repo, repo_path, changes, dry_run) + if pr: + prs_created += 1 + else: + print(f" No changes needed") + + except Exception as e: + print(f" Error processing {repo.name}: {e}") + continue + + finally: + # Cleanup + if os.path.exists(temp_dir): + subprocess.run(['rm', '-rf', temp_dir], check=False) + + print(f"\n{'='*60}") + print(f"Summary:") + print(f" Repositories processed: {repos_processed}") + if selected_repos: + print(f" Repositories skipped (not in selection): {repos_skipped}") + not_found = set(selected_repos) - found_repos + if not_found: + print(f" WARNING: Selected repositories not found: {', '.join(sorted(not_found))}") + print(f" Repositories with changes: {repos_with_changes}") + print(f" PRs created: {prs_created}") + print(f"{'='*60}") + + +if __name__ == '__main__': + main() + diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt new file mode 100644 index 0000000..09b1eb9 --- /dev/null +++ b/.github/scripts/requirements.txt @@ -0,0 +1,2 @@ +PyGithub>=2.1.1 + diff --git a/.github/workflows/bulk-find-replace.yml b/.github/workflows/bulk-find-replace.yml new file mode 100644 index 0000000..83c8d43 --- /dev/null +++ b/.github/workflows/bulk-find-replace.yml @@ -0,0 +1,51 @@ +name: Bulk Find and Replace Across Organization + +on: + workflow_dispatch: + inputs: + organization: + description: 'GitHub organization name' + required: true + type: string + repositories: + description: 'Comma-separated list of repository names to process (leave empty for all repos). Example: repo1,repo2,repo3' + required: false + type: string + default: '' + dry_run: + description: 'Dry run mode (no PRs created)' + required: false + default: 'false' + type: choice + options: + - 'true' + - 'false' + +jobs: + find-and-replace: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install -r .github/scripts/requirements.txt + + - name: Run find and replace script + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ORGANIZATION: ${{ inputs.organization }} + REPOSITORIES: ${{ inputs.repositories }} + DRY_RUN: ${{ inputs.dry_run }} + run: | + python .github/scripts/bulk_find_replace.py + diff --git a/.github/workflows/org-wide-migration.yaml b/.github/workflows/org-wide-migration.yaml new file mode 100644 index 0000000..e16c72f --- /dev/null +++ b/.github/workflows/org-wide-migration.yaml @@ -0,0 +1,196 @@ +name: Org-Wide Domain Migration + +on: + workflow_dispatch: + inputs: + organization: + description: 'The GitHub Organization name' + required: true + repo_filter: + description: 'Specific repositories to target (comma-separated). Leave empty for ALL.' + required: false + type: string + mode: + description: 'Choose Action Mode' + required: true + default: 'apply' + type: choice + options: + - apply + - revert + dry_run: + description: 'Dry Run (logs only, no push)' + required: true + default: 'false' + type: boolean + +jobs: + manage-migration: + runs-on: ubuntu-latest + steps: + - name: Checkout Controller Repo + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Initialize Report + run: | + echo "# Domain Migration Report (${{ inputs.mode }})" > migration_summary.md + echo "**User:** ${{ github.actor }} | **Mode:** ${{ inputs.mode }}" >> migration_summary.md + echo "" >> migration_summary.md + echo "| Repository | Status | Detail |" >> migration_summary.md + echo "| :--- | :--- | :--- |" >> migration_summary.md + + - name: Process Repositories + env: + GH_TOKEN: ${{ secrets.ORG_ADMIN_TOKEN }} + ORG_NAME: ${{ inputs.organization }} + REPO_FILTER: ${{ inputs.repo_filter }} + MODE: ${{ inputs.mode }} + DRY_RUN: ${{ inputs.dry_run }} + run: | + # 1. Fetch all repos (we filter later in the loop) + echo "Fetching repositories for $ORG_NAME..." + repos=$(gh repo list $ORG_NAME --limit 2000 --json name --jq '.[] | .name') + + # 2. Define the Python Script (Only needed for Apply mode) + cat << 'EOF' > process_repo.py + import os + import re + import sys + + # --- CONFIGURATION --- + OLD_DOMAIN = 'hybrid-cloud-patterns.io' + NEW_DOMAIN = 'validatedpatterns.io' + NEW_TEAM_ALIAS = 'team-validatedpatterns' + IGNORE_MARKER = 'apiVersion: hybrid-cloud-patterns.io' + + mailto_pattern = re.compile(r'(mailto:)([a-zA-Z0-9._%+-]+)(@' + re.escape(OLD_DOMAIN) + r')') + + files_changed = 0 + extensions_to_check = ['.yaml', '.yml', '.md', '.txt', '.json', '.sh', '.html', '.rst'] + + for root, dirs, files in os.walk('.'): + if '.git' in dirs: dirs.remove('.git') + for file in files: + if not any(file.endswith(ext) for ext in extensions_to_check): continue + file_path = os.path.join(root, file) + try: + with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() + except: continue + + new_lines = [] + file_modified = False + for line in lines: + if IGNORE_MARKER in line: + new_lines.append(line) + continue + + original_line = line + if 'mailto:' in line and OLD_DOMAIN in line: + line = mailto_pattern.sub(r'\1' + NEW_TEAM_ALIAS + '@' + NEW_DOMAIN, line) + if OLD_DOMAIN in line: + line = line.replace(OLD_DOMAIN, NEW_DOMAIN) + if line != original_line: + file_modified = True + new_lines.append(line) + + if file_modified: + with open(file_path, 'w', encoding='utf-8') as f: f.writelines(new_lines) + files_changed += 1 + + sys.exit(0 if files_changed > 0 else 1) + EOF + + # 3. Loop through repositories + BRANCH_NAME="migration/update-domain-references" + + for NAME in $repos; do + + # --- FILTER LOGIC --- + # If repo_filter is set, skip if NAME is not in the list + if [[ ! -z "$REPO_FILTER" ]]; then + if [[ ",$REPO_FILTER," != *",$NAME,"* ]]; then + continue + fi + fi + + echo "------------------------------------------------" + echo "Processing: $NAME" + + # Clone + git clone "https://x-access-token:${GH_TOKEN}@github.com/$ORG_NAME/$NAME.git" work_dir + cd work_dir + + if [ "$MODE" == "revert" ]; then + # --- REVERT MODE --- + echo "Attempting to revert changes in $NAME..." + + # Check if PR exists + PR_LIST=$(gh pr list --head $BRANCH_NAME --json number --jq '.[].number') + + if [ ! -z "$PR_LIST" ]; then + if [ "$DRY_RUN" = "false" ]; then + for pr_num in $PR_LIST; do + gh pr close $pr_num --delete-branch --comment "Closing via automated cleanup." + done + echo "| $NAME | 🧹 Reverted | Closed PRs and deleted branch |" >> ../migration_summary.md + else + echo "| $NAME | ⚠️ Dry Run | Would close PRs and delete branch |" >> ../migration_summary.md + fi + else + # Attempt to delete branch even if no PR exists + if git ls-remote --exit-code --heads origin $BRANCH_NAME; then + if [ "$DRY_RUN" = "false" ]; then + git push origin --delete $BRANCH_NAME + echo "| $NAME | 🧹 Cleaned | Branch deleted (No PR found) |" >> ../migration_summary.md + else + echo "| $NAME | ⚠️ Dry Run | Would delete orphan branch |" >> ../migration_summary.md + fi + else + echo "| $NAME | 💨 Skipped | No PR or Branch found |" >> ../migration_summary.md + fi + fi + + else + # --- APPLY MODE --- + git checkout -b $BRANCH_NAME || git checkout $BRANCH_NAME + + # Run Python + python3 ../process_repo.py + CHANGES_MADE=$? + + if [ $CHANGES_MADE -eq 0 ]; then + if [ "$DRY_RUN" = "false" ]; then + git config user.name "Migration Bot" + git config user.email "migration-bot@validatedpatterns.io" + git add . + git commit -m "refactor: migrate hybrid-cloud-patterns.io to validatedpatterns.io" + git push origin $BRANCH_NAME --force + + PR_URL=$(gh pr create --title "Migrate domain references" --body "Automated migration." --head $BRANCH_NAME --base main 2>/dev/null || echo "PR already exists") + echo "| $NAME | ✅ Migrated | PR Created/Updated |" >> ../migration_summary.md + else + echo "| $NAME | ⚠️ Dry Run | Changes detected |" >> ../migration_summary.md + fi + else + echo "| $NAME | 💤 Skipped | No patterns found |" >> ../migration_summary.md + fi + fi + + # Cleanup local dir + cd .. + rm -rf work_dir + done + + - name: Create Summary Issue + if: always() + env: + GH_TOKEN: ${{ secrets.ORG_ADMIN_TOKEN }} + run: | + if [ -f migration_summary.md ]; then + gh issue create --repo ${{ github.repository }} --title "Migration Report: ${{ inputs.organization }} ($MODE)" --body-file migration_summary.md + fi