_init_dist() removal (#561)

HosseinKaviani-H · Hossein Kavianihamedani · web-flow · commit 6a0687c01b5e · 2025-11-12T14:34:13.000-08:00
Co-authored-by: Hossein Kavianihamedani &lt;hosseinkh@fb.com&gt;
diff --git a/apps/sft/main.py b/apps/sft/main.py
@@ -81,34 +81,8 @@ def __init__(self, config: DictConfig):
         self.gradient_accumulation_steps = 1  # Example value, adjust as needed
         self._rank = current_rank().rank
         self._size = math.prod(current_size().values())
-        self._init_dist()
         super().__init__(job_config)
 
-    def _init_dist(self):
-        """Initializes torch distributed.
-
-        torchrun normally hands this, but we need to do it ourselves
-        in monarch for now.
-
-        We should consider putting this into ForgeActor, but having this
-        be explicit for now.
-
-        """
-        env = {
-            "RANK": str(self._rank),
-            "LOCAL_RANK": str(self._rank),
-            "LOCAL_WORLD_SIZE": str(self._size),
-            "GROUP_RANK": str(self._size),
-            "GROUP_WORLD_SIZE": str(self._size),
-            "ROLE_RANK": str(self._rank),
-            "ROLE_WORLD_SIZE": str(self._size),
-            "ROLE_NAME": "rank",
-            "WORLD_SIZE": str(self._size),
-            "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True",
-        }
-        os.environ.update(env)
-        logger.info("env: {}".format(env))
-
     async def setup_metric_logger(self):
         """Initialization happens in the main process. Here we just retrieve it"""
         mlogger = await get_or_create_metric_logger()