Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 53 additions & 56 deletions Sources/ValidatorCore/Commands/CheckDependencies.swift
Original file line number Diff line number Diff line change
Expand Up @@ -57,72 +57,69 @@ public struct CheckDependencies: AsyncParsableCommand {
let missing = allDependencies.subtracting(packageList)
print("Not indexed:", missing.count)

let client = HTTPClient(eventLoopGroupProvider: .singleton,
configuration: .init(redirectConfiguration: .disallow))
defer { try? client.syncShutdown() }

var newPackages = UniqueCanonicalPackageURLs()
for (idx, dep) in missing
.sorted(by: { $0.packageURL.absoluteString < $1.packageURL.absoluteString })
.prefix(maxCheck)
.enumerated() {
if idx % 10 == 0 {
print("Progress:", idx, "/", missing.count)
}

// resolve redirects
print("Processing:", dep.packageURL, "...")
guard let resolved = try? await Current.resolvePackageRedirects(client, dep.packageURL).url else {
// TODO: consider adding retry for some errors
print(" ... ⛔ redirect resolution returned nil")
continue
}

if resolved.canonicalPackageURL.canonicalPath != dep.canonicalPath {
print(" ... redirected to:", resolved)
}

if packageList.contains(resolved.canonicalPackageURL) {
print(" ... ⛔ already indexed")
continue
}

do { // run package dump to validate
let repo = try await Current.fetchRepository(client, resolved)
_ = try await Current.decodeManifest(client, repo)
} catch {
print(" ... ⛔ \(error)")
continue
try await HTTPClient.with(configuration: .init(redirectConfiguration: .disallow)) { client in
var newPackages = UniqueCanonicalPackageURLs()
for (idx, dep) in missing
.sorted(by: { $0.packageURL.absoluteString < $1.packageURL.absoluteString })
.prefix(maxCheck)
.enumerated() {
if idx % 10 == 0 {
print("Progress:", idx, "/", missing.count)
}

// resolve redirects
print("Processing:", dep.packageURL, "...")
guard let resolved = try? await Current.resolvePackageRedirects(client, dep.packageURL).url else {
// TODO: consider adding retry for some errors
print(" ... ⛔ redirect resolution returned nil")
continue
}

if resolved.canonicalPackageURL.canonicalPath != dep.canonicalPath {
print(" ... redirected to:", resolved)
}

if packageList.contains(resolved.canonicalPackageURL) {
print(" ... ⛔ already indexed")
continue
}

do { // run package dump to validate
let repo = try await Current.fetchRepository(client, resolved)
_ = try await Current.decodeManifest(client, repo)
} catch {
print(" ... ⛔ \(error)")
continue
}

if newPackages.insert(resolved.appendingGitExtension().canonicalPackageURL).inserted {
print("✅ ADD (\(newPackages.count)):", resolved.appendingGitExtension())
}
if newPackages.count >= limit {
print(" ... limit reached.")
break
}
}

if newPackages.insert(resolved.appendingGitExtension().canonicalPackageURL).inserted {
print("✅ ADD (\(newPackages.count)):", resolved.appendingGitExtension())
print("New packages:", newPackages.count)
for (idx, p) in newPackages
.sorted()
.enumerated() {
print(" ✅ ADD", idx, p)
}
if newPackages.count >= limit {
print(" ... limit reached.")
break
}
}

print("New packages:", newPackages.count)
for (idx, p) in newPackages
.sorted()
.enumerated() {
print(" ✅ ADD", idx, p)
}

// merge with existing and sort result
let merged = (packageList.map(\.packageURL) + newPackages.map(\.packageURL)).sorted()
// merge with existing and sort result
let merged = (packageList.map(\.packageURL) + newPackages.map(\.packageURL)).sorted()

print("Total:", merged.count)
print("Total:", merged.count)

if let path = output {
try Current.fileManager.saveList(merged, path: path)
if let path = output {
try Current.fileManager.saveList(merged, path: path)
}
}
}

public init() { }

}


Expand Down
96 changes: 47 additions & 49 deletions Sources/ValidatorCore/Commands/CheckRedirects.swift
Original file line number Diff line number Diff line change
Expand Up @@ -124,63 +124,61 @@ extension Validator {
let verbose = verbose
let inputURLs = try inputSource.packageURLs()
let prefix = limit ?? inputURLs.count
let httpClient = HTTPClient(eventLoopGroupProvider: .singleton,
configuration: .init(redirectConfiguration: .disallow))
defer { try? httpClient.syncShutdown() }
try await HTTPClient.with(configuration: .init(redirectConfiguration: .disallow)) { httpClient in
let offset = min(offset, inputURLs.count - 1)

let offset = min(offset, inputURLs.count - 1)

print("Checking for redirects (\(prefix) packages) ...")
if let chunk = chunk, let numberOfChunks = numberOfChunks {
print("Chunk \(chunk) of \(numberOfChunks)")
}
print("Checking for redirects (\(prefix) packages) ...")
if let chunk = chunk, let numberOfChunks = numberOfChunks {
print("Chunk \(chunk) of \(numberOfChunks)")
}

Self.normalizedPackageURLs = .init(inputURLs: inputURLs)

let semaphore = Semaphore(maximum: concurrency ?? 1)

let updated = await withTaskGroup(of: PackageURL?.self) { group in
for (index, packageURL) in inputURLs[offset...]
.prefix(prefix)
.chunk(index: chunk, of: numberOfChunks)
.enumerated() {
await semaphore.increment()
try? await semaphore.waitForAvailability()
group.addTask {
do {
let index = index + offset
let redirect = try await resolvePackageRedirects(client: httpClient, for: packageURL)

if index % 100 == 0, let token = Current.githubToken() {
let rateLimit = try await Github.getRateLimit(client: httpClient, token: token).get()
if rateLimit.remaining < 200 {
print("Rate limit remaining: \(rateLimit.remaining)")
print("Sleeping until reset at \(rateLimit.resetDate) ...")
sleep(UInt32(rateLimit.secondsUntilReset + 0.5))
Self.normalizedPackageURLs = .init(inputURLs: inputURLs)

let semaphore = Semaphore(maximum: concurrency ?? 1)

let updated = await withTaskGroup(of: PackageURL?.self) { group in
for (index, packageURL) in inputURLs[offset...]
.prefix(prefix)
.chunk(index: chunk, of: numberOfChunks)
.enumerated() {
await semaphore.increment()
try? await semaphore.waitForAvailability()
group.addTask {
do {
let index = index + offset
let redirect = await resolvePackageRedirects(client: httpClient, for: packageURL)

if index % 100 == 0, let token = Current.githubToken() {
let rateLimit = try await Github.getRateLimit(client: httpClient, token: token).get()
if rateLimit.remaining < 200 {
print("Rate limit remaining: \(rateLimit.remaining)")
print("Sleeping until reset at \(rateLimit.resetDate) ...")
sleep(UInt32(rateLimit.secondsUntilReset + 0.5))
}
}

let res = try await Self.process(redirect: redirect,
verbose: verbose,
index: index,
packageURL: packageURL)

await semaphore.decrement()
return res
} catch {
print("Error in main task group: \(error)")
return nil
}

let res = try await Self.process(redirect: redirect,
verbose: verbose,
index: index,
packageURL: packageURL)

await semaphore.decrement()
return res
} catch {
print("Error in main task group: \(error)")
return nil
}
}
return await group
.compactMap { $0 }
.reduce(into: [], { res, next in res.append(next) })
.sorted(by: { $0.lowercased() < $1.lowercased() })
}
return await group
.compactMap { $0 }
.reduce(into: [], { res, next in res.append(next) })
.sorted(by: { $0.lowercased() < $1.lowercased() })
}

if let path = output {
try Current.fileManager.saveList(updated, path: path)
if let path = output {
try Current.fileManager.saveList(updated, path: path)
}
}
}
}
Expand Down
30 changes: 30 additions & 0 deletions Sources/ValidatorCore/Extensions/HTTPClient+ext.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright Dave Verwer, Sven A. Schmidt, and other contributors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import AsyncHTTPClient


extension HTTPClient {
static func with(configuration: Configuration = Configuration(),
_ operation: (HTTPClient) async throws -> Void) async throws {
let client = HTTPClient(eventLoopGroupProvider: .singleton, configuration: configuration)
do {
try await operation(client)
try? await client.shutdown()
} catch {
try? await client.shutdown()
throw error
}
}
}
2 changes: 1 addition & 1 deletion Sources/ValidatorCore/PackageURL.swift
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ extension PackageURL {
}


extension PackageURL: ExpressibleByArgument {
extension PackageURL: @retroactive ExpressibleByArgument {
public init?(argument: String) {
guard let url = URL(string: argument) else { return nil }
self.init(rawValue: url)
Expand Down
30 changes: 17 additions & 13 deletions Sources/ValidatorCore/RedirectFollower.swift
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,17 @@ enum Redirect: Equatable {
}


private func resolveRedirects(client: Client, for url: PackageURL) async throws -> Redirect {
private func resolveRedirects(client: Client, for url: PackageURL) async -> Redirect {
var lastResult = Redirect.initial(url)
var hopCount = 0
let maxHops = 10

func _resolveRedirects(client: Client, for url: PackageURL) async throws -> Redirect {
var request = try HTTPClient.Request(url: url.rawValue, method: .HEAD, headers: .init([
func _resolveRedirects(client: Client, for url: PackageURL) async -> Redirect {
guard var request = try? HTTPClient.Request(url: url.rawValue, method: .HEAD, headers: .init([
("User-Agent", "SPI-Validator")
]))
])) else {
return .error("Failed to create redirect request for \(url.rawValue)")
}
if let token = Current.githubToken() {
request.headers.add(name: "Authorization", value: "Bearer \(token)")
}
Expand All @@ -75,7 +77,7 @@ private func resolveRedirects(client: Client, for url: PackageURL) async throws
}
lastResult = .redirected(to: redirected)
hopCount += 1
return try await _resolveRedirects(client: client, for: redirected)
return await _resolveRedirects(client: client, for: redirected)
case 404:
return .notFound(url)
case 429:
Expand All @@ -85,34 +87,36 @@ private func resolveRedirects(client: Client, for url: PackageURL) async throws
.flatMap(UInt64.init) ?? 60
print("Sleeping for \(delay)s ...")
try await Task.sleep(nanoseconds: NSEC_PER_SEC * delay)
return try await _resolveRedirects(client: client, for: url)
return await _resolveRedirects(client: client, for: url)
case 502: // bad gateway, https://github.com/SwiftPackageIndex/SwiftPackageIndex-Server/issues/3734
// increment hopCount as a way to limit the number of retries (even though it's
// not a true "hop")
hopCount += 1
let delay: UInt64 = 3
print("Sleeping for \(delay)s ...")
try await Task.sleep(nanoseconds: NSEC_PER_SEC * delay)
return try await _resolveRedirects(client: client, for: url)
return await _resolveRedirects(client: client, for: url)
default:
throw AppError.runtimeError("unexpected status '\(response.status.code)' for url: \(url.absoluteString)")
return .error("unexpected status '\(response.status.code)' for url: \(url.absoluteString)")
}
} catch let error as HTTPClientError where error == .remoteConnectionClosed {
hopCount += 1
let delay: UInt64 = 5
print("CONNECTION CLOSED")
print("retrying in \(delay)s ...")
try await Task.sleep(nanoseconds: NSEC_PER_SEC * delay)
return try await _resolveRedirects(client: client, for: url)
try? await Task.sleep(nanoseconds: NSEC_PER_SEC * delay)
return await _resolveRedirects(client: client, for: url)
} catch {
return .error("\(error)")
}
}

return try await _resolveRedirects(client: client, for: url)
return await _resolveRedirects(client: client, for: url)
}


func resolvePackageRedirects(client: Client, for url: PackageURL) async throws -> Redirect {
let res = try await resolveRedirects(client: client, for: url.deletingGitExtension())
func resolvePackageRedirects(client: Client, for url: PackageURL) async -> Redirect {
let res = await resolveRedirects(client: client, for: url.deletingGitExtension())
switch res {
case .initial, .notFound, .error, .unauthorized, .rateLimited:
return res
Expand Down