Skip to content

Commit f3e7ce8

Browse files
committed
add domain search workflow
1 parent 68a0989 commit f3e7ce8

File tree

3 files changed

+389
-0
lines changed

3 files changed

+389
-0
lines changed
Lines changed: 336 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Bulk find and replace script for GitHub organization repositories.
4+
Replaces:
5+
- hybrid-cloud-patterns.io -> validatedpatterns.io (except in apiVersion lines)
6+
- mailto: references -> team-validatedpatterns
7+
"""
8+
9+
import os
10+
import re
11+
import subprocess
12+
import sys
13+
from pathlib import Path
14+
from github import Github
15+
from github.GithubException import GithubException
16+
17+
18+
def clone_repo(repo_url, temp_dir):
19+
"""Clone a repository to a temporary directory."""
20+
repo_name = repo_url.split('/')[-1].replace('.git', '')
21+
clone_path = os.path.join(temp_dir, repo_name)
22+
23+
if os.path.exists(clone_path):
24+
subprocess.run(['rm', '-rf', clone_path], check=True)
25+
26+
subprocess.run(['git', 'clone', repo_url, clone_path], check=True)
27+
return clone_path
28+
29+
30+
def should_process_file(file_path):
31+
"""Determine if a file should be processed."""
32+
# Skip binary files and common non-text files
33+
skip_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.pdf',
34+
'.zip', '.tar', '.gz', '.bz2', '.woff', '.woff2', '.ttf',
35+
'.eot', '.otf', '.mp4', '.mp3', '.avi', '.mov'}
36+
37+
# Skip common directories
38+
skip_dirs = {'.git', 'node_modules', '.venv', 'venv', '__pycache__',
39+
'.pytest_cache', 'dist', 'build', '.tox', '.eggs'}
40+
41+
file_path_str = str(file_path)
42+
43+
# Check if in skip directory
44+
for skip_dir in skip_dirs:
45+
if f'/{skip_dir}/' in file_path_str or file_path_str.startswith(skip_dir):
46+
return False
47+
48+
# Check extension
49+
if any(file_path_str.lower().endswith(ext) for ext in skip_extensions):
50+
return False
51+
52+
return True
53+
54+
55+
def replace_in_file(file_path, dry_run=False):
56+
"""Replace patterns in a file."""
57+
try:
58+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
59+
content = f.read()
60+
61+
original_content = content
62+
changes_made = False
63+
64+
# Replace hybrid-cloud-patterns.io with validatedpatterns.io
65+
# But skip lines that contain apiVersion: hybrid-cloud-patterns.io
66+
lines = content.split('\n')
67+
new_lines = []
68+
69+
for line in lines:
70+
original_line = line
71+
72+
# Skip apiVersion lines
73+
if re.search(r'apiVersion:\s*hybrid-cloud-patterns\.io', line, re.IGNORECASE):
74+
new_lines.append(line)
75+
continue
76+
77+
# Replace hybrid-cloud-patterns.io with validatedpatterns.io
78+
if 'hybrid-cloud-patterns.io' in line:
79+
line = line.replace('hybrid-cloud-patterns.io', 'validatedpatterns.io')
80+
changes_made = True
81+
82+
# Replace mailto: references
83+
# Pattern: mailto:something@domain -> mailto:team-validatedpatterns
84+
# Skip if already mailto:team-validatedpatterns
85+
mailto_pattern = r'mailto:([^\s<>"\'\)]+)'
86+
if re.search(mailto_pattern, line, re.IGNORECASE):
87+
# Check if it's already team-validatedpatterns
88+
if not re.search(r'mailto:team-validatedpatterns', line, re.IGNORECASE):
89+
line = re.sub(mailto_pattern, 'mailto:team-validatedpatterns', line, flags=re.IGNORECASE)
90+
if line != original_line:
91+
changes_made = True
92+
93+
new_lines.append(line)
94+
95+
new_content = '\n'.join(new_lines)
96+
97+
if changes_made and not dry_run:
98+
with open(file_path, 'w', encoding='utf-8') as f:
99+
f.write(new_content)
100+
return True
101+
elif changes_made:
102+
return True
103+
104+
return False
105+
except Exception as e:
106+
print(f"Error processing {file_path}: {e}")
107+
return False
108+
109+
110+
def process_repository(repo_path, dry_run=False):
111+
"""Process all files in a repository."""
112+
changes = []
113+
114+
for root, dirs, files in os.walk(repo_path):
115+
# Remove .git from dirs to avoid processing it
116+
if '.git' in dirs:
117+
dirs.remove('.git')
118+
119+
for file in files:
120+
file_path = Path(root) / file
121+
122+
if not should_process_file(file_path):
123+
continue
124+
125+
if replace_in_file(file_path, dry_run):
126+
changes.append(str(file_path.relative_to(repo_path)))
127+
128+
return changes
129+
130+
131+
def get_default_branch(repo_path):
132+
"""Get the default branch name for the repository."""
133+
try:
134+
result = subprocess.run(['git', 'symbolic-ref', 'refs/remotes/origin/HEAD'],
135+
cwd=repo_path, capture_output=True, text=True, check=True)
136+
default_branch = result.stdout.strip().replace('refs/remotes/origin/', '')
137+
return default_branch
138+
except subprocess.CalledProcessError:
139+
# Try common branch names
140+
for branch in ['main', 'master', 'develop']:
141+
result = subprocess.run(['git', 'show-ref', f'refs/remotes/origin/{branch}'],
142+
cwd=repo_path, capture_output=True)
143+
if result.returncode == 0:
144+
return branch
145+
return 'main' # Default fallback
146+
147+
148+
def create_branch_and_pr(repo, repo_path, changes, dry_run=False):
149+
"""Create a branch, commit changes, and open a PR."""
150+
if not changes:
151+
print(f"No changes needed in {repo.name}")
152+
return None
153+
154+
branch_name = 'bulk-find-replace-update'
155+
default_branch = get_default_branch(repo_path)
156+
157+
try:
158+
# Check if branch already exists
159+
try:
160+
existing_branch = repo.get_branch(branch_name)
161+
if existing_branch:
162+
print(f"Branch {branch_name} already exists in {repo.name}, skipping")
163+
return None
164+
except GithubException:
165+
pass # Branch doesn't exist, which is what we want
166+
167+
if dry_run:
168+
print(f"[DRY RUN] Would create branch and PR for {repo.name} with {len(changes)} changed files")
169+
return None
170+
171+
# Ensure we're on the default branch
172+
subprocess.run(['git', 'checkout', default_branch], cwd=repo_path, check=True)
173+
174+
# Create and checkout branch
175+
subprocess.run(['git', 'checkout', '-b', branch_name], cwd=repo_path, check=True)
176+
177+
# Stage all changes
178+
subprocess.run(['git', 'add', '-A'], cwd=repo_path, check=True)
179+
180+
# Check if there are any changes to commit
181+
result = subprocess.run(['git', 'diff', '--cached', '--quiet'], cwd=repo_path)
182+
if result.returncode == 0:
183+
print(f"No changes to commit in {repo.name}")
184+
subprocess.run(['git', 'checkout', default_branch], cwd=repo_path)
185+
return None
186+
187+
# Commit changes
188+
commit_message = """Bulk find and replace: hybrid-cloud-patterns.io -> validatedpatterns.io
189+
190+
- Replaced hybrid-cloud-patterns.io with validatedpatterns.io (excluding apiVersion references)
191+
- Updated mailto: references to team-validatedpatterns
192+
193+
This PR was automatically generated by the bulk find and replace workflow."""
194+
195+
subprocess.run(['git', 'commit', '-m', commit_message], cwd=repo_path, check=True)
196+
197+
# Push branch
198+
subprocess.run(['git', 'push', 'origin', branch_name], cwd=repo_path, check=True)
199+
200+
# Create PR
201+
pr_title = "Bulk find and replace: hybrid-cloud-patterns.io -> validatedpatterns.io"
202+
pr_body = f"""This PR updates references from `hybrid-cloud-patterns.io` to `validatedpatterns.io` and updates mailto references.
203+
204+
**Changes:**
205+
- Replaced `hybrid-cloud-patterns.io` with `validatedpatterns.io` (excluding `apiVersion` references)
206+
- Updated `mailto:` references to `team-validatedpatterns`
207+
208+
**Files changed:** {len(changes)}
209+
210+
This PR was automatically generated by the bulk find and replace workflow."""
211+
212+
pr = repo.create_pull(
213+
title=pr_title,
214+
body=pr_body,
215+
head=branch_name,
216+
base=default_branch
217+
)
218+
219+
print(f"Created PR #{pr.number} in {repo.name}: {pr.html_url}")
220+
return pr
221+
222+
except subprocess.CalledProcessError as e:
223+
print(f"Error creating branch/PR for {repo.name}: {e}")
224+
return None
225+
except GithubException as e:
226+
print(f"GitHub API error for {repo.name}: {e}")
227+
return None
228+
229+
230+
def main():
231+
github_token = os.environ.get('GITHUB_TOKEN')
232+
organization = os.environ.get('ORGANIZATION')
233+
repositories_input = os.environ.get('REPOSITORIES', '').strip()
234+
dry_run = os.environ.get('DRY_RUN', 'false').lower() == 'true'
235+
236+
if not github_token:
237+
print("Error: GITHUB_TOKEN environment variable is required")
238+
sys.exit(1)
239+
240+
if not organization:
241+
print("Error: ORGANIZATION environment variable is required")
242+
sys.exit(1)
243+
244+
g = Github(github_token)
245+
org = g.get_organization(organization)
246+
247+
# Parse repository list if provided
248+
selected_repos = None
249+
if repositories_input:
250+
selected_repos = [r.strip() for r in repositories_input.split(',') if r.strip()]
251+
print(f"Processing selected repositories: {', '.join(selected_repos)}")
252+
else:
253+
print(f"Processing all repositories in organization: {organization}")
254+
255+
print(f"Dry run mode: {dry_run}")
256+
257+
temp_dir = '/tmp/bulk_find_replace'
258+
os.makedirs(temp_dir, exist_ok=True)
259+
260+
repos_processed = 0
261+
repos_with_changes = 0
262+
prs_created = 0
263+
repos_skipped = 0
264+
found_repos = set()
265+
266+
try:
267+
for repo in org.get_repos():
268+
# Filter by selected repositories if provided
269+
if selected_repos:
270+
if repo.name not in selected_repos:
271+
repos_skipped += 1
272+
continue
273+
found_repos.add(repo.name)
274+
repos_processed += 1
275+
print(f"\n[{repos_processed}] Processing {repo.name}...")
276+
277+
# Skip archived repositories
278+
if repo.archived:
279+
print(f" Skipping archived repository")
280+
continue
281+
282+
# Skip empty repositories
283+
if repo.size == 0:
284+
print(f" Skipping empty repository")
285+
continue
286+
287+
try:
288+
# Clone repository
289+
repo_path = clone_repo(repo.clone_url, temp_dir)
290+
291+
# Configure git user for commits
292+
subprocess.run(['git', 'config', 'user.name', 'github-actions[bot]'],
293+
cwd=repo_path, check=True)
294+
subprocess.run(['git', 'config', 'user.email',
295+
'github-actions[bot]@users.noreply.github.com'],
296+
cwd=repo_path, check=True)
297+
298+
# Process files
299+
changes = process_repository(repo_path, dry_run)
300+
301+
if changes:
302+
repos_with_changes += 1
303+
print(f" Found {len(changes)} files with changes")
304+
305+
# Create branch and PR
306+
pr = create_branch_and_pr(repo, repo_path, changes, dry_run)
307+
if pr:
308+
prs_created += 1
309+
else:
310+
print(f" No changes needed")
311+
312+
except Exception as e:
313+
print(f" Error processing {repo.name}: {e}")
314+
continue
315+
316+
finally:
317+
# Cleanup
318+
if os.path.exists(temp_dir):
319+
subprocess.run(['rm', '-rf', temp_dir], check=False)
320+
321+
print(f"\n{'='*60}")
322+
print(f"Summary:")
323+
print(f" Repositories processed: {repos_processed}")
324+
if selected_repos:
325+
print(f" Repositories skipped (not in selection): {repos_skipped}")
326+
not_found = set(selected_repos) - found_repos
327+
if not_found:
328+
print(f" WARNING: Selected repositories not found: {', '.join(sorted(not_found))}")
329+
print(f" Repositories with changes: {repos_with_changes}")
330+
print(f" PRs created: {prs_created}")
331+
print(f"{'='*60}")
332+
333+
334+
if __name__ == '__main__':
335+
main()
336+

.github/scripts/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
PyGithub>=2.1.1
2+
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: Bulk Find and Replace Across Organization
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
organization:
7+
description: 'GitHub organization name'
8+
required: true
9+
type: string
10+
repositories:
11+
description: 'Comma-separated list of repository names to process (leave empty for all repos). Example: repo1,repo2,repo3'
12+
required: false
13+
type: string
14+
default: ''
15+
dry_run:
16+
description: 'Dry run mode (no PRs created)'
17+
required: false
18+
default: 'false'
19+
type: choice
20+
options:
21+
- 'true'
22+
- 'false'
23+
24+
jobs:
25+
find-and-replace:
26+
runs-on: ubuntu-latest
27+
permissions:
28+
contents: write
29+
pull-requests: write
30+
steps:
31+
- name: Checkout repository
32+
uses: actions/checkout@v4
33+
34+
- name: Set up Python
35+
uses: actions/setup-python@v5
36+
with:
37+
python-version: '3.11'
38+
39+
- name: Install dependencies
40+
run: |
41+
pip install -r .github/scripts/requirements.txt
42+
43+
- name: Run find and replace script
44+
env:
45+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
46+
ORGANIZATION: ${{ inputs.organization }}
47+
REPOSITORIES: ${{ inputs.repositories }}
48+
DRY_RUN: ${{ inputs.dry_run }}
49+
run: |
50+
python .github/scripts/bulk_find_replace.py
51+

0 commit comments

Comments
 (0)