99from typing import Pattern
1010
1111import pykube
12- from pykube import CronJob
12+ from pykube import CronJob , HTTPClient
1313from pykube import Deployment
1414from pykube import HorizontalPodAutoscaler
1515from pykube import Namespace
1616from pykube import StatefulSet
1717from pykube import Job
1818from pykube import CustomResourceDefinition
19+ from pykube .exceptions import HTTPError
1920from pykube .objects import NamespacedAPIObject , APIObject
2021from pykube import DaemonSet
2122from pykube .objects import NamespacedAPIObject , PodDisruptionBudget
@@ -299,6 +300,28 @@ def get_resources(kind, api, namespaces: FrozenSet[str], excluded_namespaces):
299300 return resources , excluded_namespaces ;
300301
301302
303+ def get_resource (kind , api , namespace , resource_name : str ):
304+
305+ try :
306+ resource = kind .objects (api ).filter (namespace = namespace ).get_or_none (name = resource_name )
307+ if resource is None :
308+ logger .debug (f"{ kind .endpoint } { namespace } /{ resource_name } not found" )
309+ except requests .HTTPError as e :
310+ resource = None
311+ if e .response .status_code == 404 :
312+ logger .debug (
313+ f"{ kind } { resource_name } not found in namespace { namespace } (404)"
314+ )
315+ if e .response .status_code == 403 :
316+ logger .warning (
317+ f"KubeDownscaler is not authorized to to retrieve { kind } { namespace } /{ resource_name } (403)"
318+ )
319+ else :
320+ raise e
321+
322+ return resource
323+
324+
302325def scale_jobs_without_admission_controller (plural , admission_controller , constrainted_downscaler ):
303326 return (plural == "jobs" and admission_controller == "" ) or constrainted_downscaler
304327
@@ -864,6 +887,9 @@ def autoscale_resource(
864887 forced_uptime : bool ,
865888 forced_downtime : bool ,
866889 upscale_target_only : bool ,
890+ max_retries_on_conflict : int ,
891+ api : HTTPClient ,
892+ kind : NamespacedAPIObject ,
867893 dry_run : bool ,
868894 now : datetime .datetime ,
869895 grace_period : int = 0 ,
@@ -989,14 +1015,55 @@ def autoscale_resource(
9891015 else :
9901016 resource .update ()
9911017 except Exception as e :
992- logger .exception (
993- f"Failed to process { resource .kind } { resource .namespace } /{ resource .name } : { e } "
994- )
1018+ if isinstance (e , HTTPError ) and "the object has been modified" in str (e ).lower ():
1019+ logger .warning (
1020+ f"Unable to process { resource .kind } { resource .namespace } /{ resource .name } because it was recently modified"
1021+ )
1022+ if max_retries_on_conflict > 0 :
1023+ logger .info (
1024+ f"Retrying processing { resource .kind } { resource .namespace } /{ resource .name } (Remaining Retries: { max_retries_on_conflict } )" )
1025+ max_retries_on_conflict = max_retries_on_conflict - 1
1026+ refreshed_resource = get_resource (kind , api , resource .namespace , resource .name )
1027+ if refreshed_resource is not None :
1028+ autoscale_resource (
1029+ refreshed_resource ,
1030+ upscale_period ,
1031+ downscale_period ,
1032+ default_uptime ,
1033+ default_downtime ,
1034+ forced_uptime ,
1035+ forced_downtime ,
1036+ upscale_target_only ,
1037+ max_retries_on_conflict ,
1038+ api ,
1039+ kind ,
1040+ dry_run ,
1041+ now ,
1042+ grace_period ,
1043+ downtime_replicas ,
1044+ namespace_excluded = namespace_excluded ,
1045+ deployment_time_annotation = deployment_time_annotation ,
1046+ enable_events = enable_events ,
1047+ matching_labels = matching_labels ,
1048+ )
1049+ else :
1050+ logger .warning (
1051+ f"Retry process failed for { resource .kind } { resource .namespace } /{ resource .name } because the resource cannot be found, it may have been deleted from the cluster" )
1052+ else :
1053+ logger .warning (
1054+ f"Will retry processing { resource .kind } { resource .namespace } /{ resource .name } in the next iteration, unless the --once argument is specified"
1055+ )
1056+ elif isinstance (e , HTTPError ) and "not found" in str (e ).lower ():
1057+ logger .info (f"While waiting to process { resource .kind } { resource .namespace } /{ resource .name } , the resource was removed from the cluster" )
1058+ else :
1059+ logger .exception (
1060+ f"Failed to process { resource .kind } { resource .namespace } /{ resource .name } : { e } "
1061+ )
9951062
9961063
9971064def autoscale_resources (
998- api ,
999- kind ,
1065+ api : HTTPClient ,
1066+ kind : NamespacedAPIObject ,
10001067 namespace : FrozenSet [Pattern ],
10011068 exclude_namespaces : FrozenSet [Pattern ],
10021069 exclude_names : FrozenSet [str ],
@@ -1008,6 +1075,7 @@ def autoscale_resources(
10081075 forced_uptime : bool ,
10091076 upscale_target_only : bool ,
10101077 constrained_downscaler : bool ,
1078+ max_retries_on_conflict : int ,
10111079 dry_run : bool ,
10121080 now : datetime .datetime ,
10131081 grace_period : int ,
@@ -1115,6 +1183,9 @@ def autoscale_resources(
11151183 forced_uptime_for_namespace ,
11161184 forced_downtime_for_namespace ,
11171185 upscale_target_only ,
1186+ max_retries_on_conflict ,
1187+ api ,
1188+ kind ,
11181189 dry_run ,
11191190 now ,
11201191 grace_period ,
@@ -1351,6 +1422,7 @@ def scale(
13511422 admission_controller : str ,
13521423 constrained_downscaler : bool ,
13531424 api_server_timeout : int ,
1425+ max_retries_on_conflict : int ,
13541426 downtime_replicas : int = 0 ,
13551427 deployment_time_annotation : Optional [str ] = None ,
13561428 enable_events : bool = False ,
@@ -1379,6 +1451,7 @@ def scale(
13791451 forced_uptime ,
13801452 upscale_target_only ,
13811453 constrained_downscaler ,
1454+ max_retries_on_conflict ,
13821455 dry_run ,
13831456 now ,
13841457 grace_period ,
0 commit comments