|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Bulk find and replace script for GitHub organization repositories. |
| 4 | +Replaces: |
| 5 | +- hybrid-cloud-patterns.io -> validatedpatterns.io (except in apiVersion lines) |
| 6 | +- mailto: references -> team-validatedpatterns |
| 7 | +""" |
| 8 | + |
| 9 | +import os |
| 10 | +import re |
| 11 | +import subprocess |
| 12 | +import sys |
| 13 | +from pathlib import Path |
| 14 | +from github import Github |
| 15 | +from github.GithubException import GithubException |
| 16 | + |
| 17 | + |
| 18 | +def clone_repo(repo_url, temp_dir): |
| 19 | + """Clone a repository to a temporary directory.""" |
| 20 | + repo_name = repo_url.split('/')[-1].replace('.git', '') |
| 21 | + clone_path = os.path.join(temp_dir, repo_name) |
| 22 | + |
| 23 | + if os.path.exists(clone_path): |
| 24 | + subprocess.run(['rm', '-rf', clone_path], check=True) |
| 25 | + |
| 26 | + subprocess.run(['git', 'clone', repo_url, clone_path], check=True) |
| 27 | + return clone_path |
| 28 | + |
| 29 | + |
| 30 | +def should_process_file(file_path): |
| 31 | + """Determine if a file should be processed.""" |
| 32 | + # Skip binary files and common non-text files |
| 33 | + skip_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.pdf', |
| 34 | + '.zip', '.tar', '.gz', '.bz2', '.woff', '.woff2', '.ttf', |
| 35 | + '.eot', '.otf', '.mp4', '.mp3', '.avi', '.mov'} |
| 36 | + |
| 37 | + # Skip common directories |
| 38 | + skip_dirs = {'.git', 'node_modules', '.venv', 'venv', '__pycache__', |
| 39 | + '.pytest_cache', 'dist', 'build', '.tox', '.eggs'} |
| 40 | + |
| 41 | + file_path_str = str(file_path) |
| 42 | + |
| 43 | + # Check if in skip directory |
| 44 | + for skip_dir in skip_dirs: |
| 45 | + if f'/{skip_dir}/' in file_path_str or file_path_str.startswith(skip_dir): |
| 46 | + return False |
| 47 | + |
| 48 | + # Check extension |
| 49 | + if any(file_path_str.lower().endswith(ext) for ext in skip_extensions): |
| 50 | + return False |
| 51 | + |
| 52 | + return True |
| 53 | + |
| 54 | + |
| 55 | +def replace_in_file(file_path, dry_run=False): |
| 56 | + """Replace patterns in a file.""" |
| 57 | + try: |
| 58 | + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: |
| 59 | + content = f.read() |
| 60 | + |
| 61 | + original_content = content |
| 62 | + changes_made = False |
| 63 | + |
| 64 | + # Replace hybrid-cloud-patterns.io with validatedpatterns.io |
| 65 | + # But skip lines that contain apiVersion: hybrid-cloud-patterns.io |
| 66 | + lines = content.split('\n') |
| 67 | + new_lines = [] |
| 68 | + |
| 69 | + for line in lines: |
| 70 | + original_line = line |
| 71 | + |
| 72 | + # Skip apiVersion lines |
| 73 | + if re.search(r'apiVersion:\s*hybrid-cloud-patterns\.io', line, re.IGNORECASE): |
| 74 | + new_lines.append(line) |
| 75 | + continue |
| 76 | + |
| 77 | + # Replace hybrid-cloud-patterns.io with validatedpatterns.io |
| 78 | + if 'hybrid-cloud-patterns.io' in line: |
| 79 | + line = line.replace('hybrid-cloud-patterns.io', 'validatedpatterns.io') |
| 80 | + changes_made = True |
| 81 | + |
| 82 | + # Replace mailto: references |
| 83 | + # Pattern: mailto:something@domain -> mailto:team-validatedpatterns |
| 84 | + # Skip if already mailto:team-validatedpatterns |
| 85 | + mailto_pattern = r'mailto:([^\s<>"\'\)]+)' |
| 86 | + if re.search(mailto_pattern, line, re.IGNORECASE): |
| 87 | + # Check if it's already team-validatedpatterns |
| 88 | + if not re.search(r'mailto:team-validatedpatterns', line, re.IGNORECASE): |
| 89 | + line = re.sub(mailto_pattern, 'mailto:team-validatedpatterns', line, flags=re.IGNORECASE) |
| 90 | + if line != original_line: |
| 91 | + changes_made = True |
| 92 | + |
| 93 | + new_lines.append(line) |
| 94 | + |
| 95 | + new_content = '\n'.join(new_lines) |
| 96 | + |
| 97 | + if changes_made and not dry_run: |
| 98 | + with open(file_path, 'w', encoding='utf-8') as f: |
| 99 | + f.write(new_content) |
| 100 | + return True |
| 101 | + elif changes_made: |
| 102 | + return True |
| 103 | + |
| 104 | + return False |
| 105 | + except Exception as e: |
| 106 | + print(f"Error processing {file_path}: {e}") |
| 107 | + return False |
| 108 | + |
| 109 | + |
| 110 | +def process_repository(repo_path, dry_run=False): |
| 111 | + """Process all files in a repository.""" |
| 112 | + changes = [] |
| 113 | + |
| 114 | + for root, dirs, files in os.walk(repo_path): |
| 115 | + # Remove .git from dirs to avoid processing it |
| 116 | + if '.git' in dirs: |
| 117 | + dirs.remove('.git') |
| 118 | + |
| 119 | + for file in files: |
| 120 | + file_path = Path(root) / file |
| 121 | + |
| 122 | + if not should_process_file(file_path): |
| 123 | + continue |
| 124 | + |
| 125 | + if replace_in_file(file_path, dry_run): |
| 126 | + changes.append(str(file_path.relative_to(repo_path))) |
| 127 | + |
| 128 | + return changes |
| 129 | + |
| 130 | + |
| 131 | +def get_default_branch(repo_path): |
| 132 | + """Get the default branch name for the repository.""" |
| 133 | + try: |
| 134 | + result = subprocess.run(['git', 'symbolic-ref', 'refs/remotes/origin/HEAD'], |
| 135 | + cwd=repo_path, capture_output=True, text=True, check=True) |
| 136 | + default_branch = result.stdout.strip().replace('refs/remotes/origin/', '') |
| 137 | + return default_branch |
| 138 | + except subprocess.CalledProcessError: |
| 139 | + # Try common branch names |
| 140 | + for branch in ['main', 'master', 'develop']: |
| 141 | + result = subprocess.run(['git', 'show-ref', f'refs/remotes/origin/{branch}'], |
| 142 | + cwd=repo_path, capture_output=True) |
| 143 | + if result.returncode == 0: |
| 144 | + return branch |
| 145 | + return 'main' # Default fallback |
| 146 | + |
| 147 | + |
| 148 | +def create_branch_and_pr(repo, repo_path, changes, dry_run=False): |
| 149 | + """Create a branch, commit changes, and open a PR.""" |
| 150 | + if not changes: |
| 151 | + print(f"No changes needed in {repo.name}") |
| 152 | + return None |
| 153 | + |
| 154 | + branch_name = 'bulk-find-replace-update' |
| 155 | + default_branch = get_default_branch(repo_path) |
| 156 | + |
| 157 | + try: |
| 158 | + # Check if branch already exists |
| 159 | + try: |
| 160 | + existing_branch = repo.get_branch(branch_name) |
| 161 | + if existing_branch: |
| 162 | + print(f"Branch {branch_name} already exists in {repo.name}, skipping") |
| 163 | + return None |
| 164 | + except GithubException: |
| 165 | + pass # Branch doesn't exist, which is what we want |
| 166 | + |
| 167 | + if dry_run: |
| 168 | + print(f"[DRY RUN] Would create branch and PR for {repo.name} with {len(changes)} changed files") |
| 169 | + return None |
| 170 | + |
| 171 | + # Ensure we're on the default branch |
| 172 | + subprocess.run(['git', 'checkout', default_branch], cwd=repo_path, check=True) |
| 173 | + |
| 174 | + # Create and checkout branch |
| 175 | + subprocess.run(['git', 'checkout', '-b', branch_name], cwd=repo_path, check=True) |
| 176 | + |
| 177 | + # Stage all changes |
| 178 | + subprocess.run(['git', 'add', '-A'], cwd=repo_path, check=True) |
| 179 | + |
| 180 | + # Check if there are any changes to commit |
| 181 | + result = subprocess.run(['git', 'diff', '--cached', '--quiet'], cwd=repo_path) |
| 182 | + if result.returncode == 0: |
| 183 | + print(f"No changes to commit in {repo.name}") |
| 184 | + subprocess.run(['git', 'checkout', default_branch], cwd=repo_path) |
| 185 | + return None |
| 186 | + |
| 187 | + # Commit changes |
| 188 | + commit_message = """Bulk find and replace: hybrid-cloud-patterns.io -> validatedpatterns.io |
| 189 | +
|
| 190 | +- Replaced hybrid-cloud-patterns.io with validatedpatterns.io (excluding apiVersion references) |
| 191 | +- Updated mailto: references to team-validatedpatterns |
| 192 | +
|
| 193 | +This PR was automatically generated by the bulk find and replace workflow.""" |
| 194 | + |
| 195 | + subprocess.run(['git', 'commit', '-m', commit_message], cwd=repo_path, check=True) |
| 196 | + |
| 197 | + # Push branch |
| 198 | + subprocess.run(['git', 'push', 'origin', branch_name], cwd=repo_path, check=True) |
| 199 | + |
| 200 | + # Create PR |
| 201 | + pr_title = "Bulk find and replace: hybrid-cloud-patterns.io -> validatedpatterns.io" |
| 202 | + pr_body = f"""This PR updates references from `hybrid-cloud-patterns.io` to `validatedpatterns.io` and updates mailto references. |
| 203 | +
|
| 204 | +**Changes:** |
| 205 | +- Replaced `hybrid-cloud-patterns.io` with `validatedpatterns.io` (excluding `apiVersion` references) |
| 206 | +- Updated `mailto:` references to `team-validatedpatterns` |
| 207 | +
|
| 208 | +**Files changed:** {len(changes)} |
| 209 | +
|
| 210 | +This PR was automatically generated by the bulk find and replace workflow.""" |
| 211 | + |
| 212 | + pr = repo.create_pull( |
| 213 | + title=pr_title, |
| 214 | + body=pr_body, |
| 215 | + head=branch_name, |
| 216 | + base=default_branch |
| 217 | + ) |
| 218 | + |
| 219 | + print(f"Created PR #{pr.number} in {repo.name}: {pr.html_url}") |
| 220 | + return pr |
| 221 | + |
| 222 | + except subprocess.CalledProcessError as e: |
| 223 | + print(f"Error creating branch/PR for {repo.name}: {e}") |
| 224 | + return None |
| 225 | + except GithubException as e: |
| 226 | + print(f"GitHub API error for {repo.name}: {e}") |
| 227 | + return None |
| 228 | + |
| 229 | + |
| 230 | +def main(): |
| 231 | + github_token = os.environ.get('GITHUB_TOKEN') |
| 232 | + organization = os.environ.get('ORGANIZATION') |
| 233 | + repositories_input = os.environ.get('REPOSITORIES', '').strip() |
| 234 | + dry_run = os.environ.get('DRY_RUN', 'false').lower() == 'true' |
| 235 | + |
| 236 | + if not github_token: |
| 237 | + print("Error: GITHUB_TOKEN environment variable is required") |
| 238 | + sys.exit(1) |
| 239 | + |
| 240 | + if not organization: |
| 241 | + print("Error: ORGANIZATION environment variable is required") |
| 242 | + sys.exit(1) |
| 243 | + |
| 244 | + g = Github(github_token) |
| 245 | + org = g.get_organization(organization) |
| 246 | + |
| 247 | + # Parse repository list if provided |
| 248 | + selected_repos = None |
| 249 | + if repositories_input: |
| 250 | + selected_repos = [r.strip() for r in repositories_input.split(',') if r.strip()] |
| 251 | + print(f"Processing selected repositories: {', '.join(selected_repos)}") |
| 252 | + else: |
| 253 | + print(f"Processing all repositories in organization: {organization}") |
| 254 | + |
| 255 | + print(f"Dry run mode: {dry_run}") |
| 256 | + |
| 257 | + temp_dir = '/tmp/bulk_find_replace' |
| 258 | + os.makedirs(temp_dir, exist_ok=True) |
| 259 | + |
| 260 | + repos_processed = 0 |
| 261 | + repos_with_changes = 0 |
| 262 | + prs_created = 0 |
| 263 | + repos_skipped = 0 |
| 264 | + found_repos = set() |
| 265 | + |
| 266 | + try: |
| 267 | + for repo in org.get_repos(): |
| 268 | + # Filter by selected repositories if provided |
| 269 | + if selected_repos: |
| 270 | + if repo.name not in selected_repos: |
| 271 | + repos_skipped += 1 |
| 272 | + continue |
| 273 | + found_repos.add(repo.name) |
| 274 | + repos_processed += 1 |
| 275 | + print(f"\n[{repos_processed}] Processing {repo.name}...") |
| 276 | + |
| 277 | + # Skip archived repositories |
| 278 | + if repo.archived: |
| 279 | + print(f" Skipping archived repository") |
| 280 | + continue |
| 281 | + |
| 282 | + # Skip empty repositories |
| 283 | + if repo.size == 0: |
| 284 | + print(f" Skipping empty repository") |
| 285 | + continue |
| 286 | + |
| 287 | + try: |
| 288 | + # Clone repository |
| 289 | + repo_path = clone_repo(repo.clone_url, temp_dir) |
| 290 | + |
| 291 | + # Configure git user for commits |
| 292 | + subprocess.run(['git', 'config', 'user.name', 'github-actions[bot]'], |
| 293 | + cwd=repo_path, check=True) |
| 294 | + subprocess.run(['git', 'config', 'user.email', |
| 295 | + 'github-actions[bot]@users.noreply.github.com'], |
| 296 | + cwd=repo_path, check=True) |
| 297 | + |
| 298 | + # Process files |
| 299 | + changes = process_repository(repo_path, dry_run) |
| 300 | + |
| 301 | + if changes: |
| 302 | + repos_with_changes += 1 |
| 303 | + print(f" Found {len(changes)} files with changes") |
| 304 | + |
| 305 | + # Create branch and PR |
| 306 | + pr = create_branch_and_pr(repo, repo_path, changes, dry_run) |
| 307 | + if pr: |
| 308 | + prs_created += 1 |
| 309 | + else: |
| 310 | + print(f" No changes needed") |
| 311 | + |
| 312 | + except Exception as e: |
| 313 | + print(f" Error processing {repo.name}: {e}") |
| 314 | + continue |
| 315 | + |
| 316 | + finally: |
| 317 | + # Cleanup |
| 318 | + if os.path.exists(temp_dir): |
| 319 | + subprocess.run(['rm', '-rf', temp_dir], check=False) |
| 320 | + |
| 321 | + print(f"\n{'='*60}") |
| 322 | + print(f"Summary:") |
| 323 | + print(f" Repositories processed: {repos_processed}") |
| 324 | + if selected_repos: |
| 325 | + print(f" Repositories skipped (not in selection): {repos_skipped}") |
| 326 | + not_found = set(selected_repos) - found_repos |
| 327 | + if not_found: |
| 328 | + print(f" WARNING: Selected repositories not found: {', '.join(sorted(not_found))}") |
| 329 | + print(f" Repositories with changes: {repos_with_changes}") |
| 330 | + print(f" PRs created: {prs_created}") |
| 331 | + print(f"{'='*60}") |
| 332 | + |
| 333 | + |
| 334 | +if __name__ == '__main__': |
| 335 | + main() |
| 336 | + |
0 commit comments