From 5d33a941713bf4f8988595ea4667d4697e237578 Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 08:13:51 +0000
Subject: [PATCH 01/12] Change: Conclude weight_factor use in train_chunk

---
 trainer/train.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/trainer/train.py b/trainer/train.py
index f01b0ec..a1742c8 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -54,7 +54,7 @@ def step(weights):
             frame = motion.frame_at(start + scene.ts - start_ts)
 
             frame.positions = apply_weights(
-                frame.positions, (init_weight + frame_weight) * weight_factor)
+                frame.positions, init_weight + frame_weight * weight_factor)
             apply_joints(robot, frame.positions)
 
             scene.step()
@@ -75,10 +75,10 @@ def step(weights):
     reward = step(weights)
 
     state = StateWithJoints.save(scene, robot)
-    return reward, weights, state
+    return reward, weights * weight_factor, state
 
 
-def train(scene, motion, robot, chunk_length=3, num_chunk=100, weight_factor=0.01, **kwargs):
+def train(scene, motion, robot, chunk_length=3, num_chunk=100, **kwargs):
     chunk_duration = scene.dt * chunk_length
     total_length = chunk_duration * num_chunk
     log.info(f"chunk duration: {chunk_duration} s")
@@ -102,8 +102,7 @@ def train(scene, motion, robot, chunk_length=3, num_chunk=100, weight_factor=0.0
         r = range(start_idx, start_idx + chunk_length)
         in_weights = [weights[i % num_frames] for i in r]
         log.info(f"start training chunk {chunk_idx} ({start}~)")
-        reward, out_weights, last_state = train_chunk(
-            scene, motion, robot, start, in_weights, last_state, weight_factor=weight_factor, **kwargs)
+        reward, out_weights, last_state = train_chunk(scene, motion, robot, start, in_weights, last_state, **kwargs)
         for i, w in zip(r, out_weights):
             weights[i % num_frames] = w
 
@@ -119,6 +118,6 @@ def train(scene, motion, robot, chunk_length=3, num_chunk=100, weight_factor=0.0
     for i, frame_weight in enumerate(weights):
         t = i * scene.dt
         new_frame = motion.frame_at(t)
-        new_frame.positions = apply_weights(new_frame.positions, frame_weight * weight_factor)
+        new_frame.positions = apply_weights(new_frame.positions, frame_weight)
         new_motion.insert_keyframe(t, new_frame)
     return new_motion

From ec32a2dbd85f6ff1114fcd64c37746454bde95d6 Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 08:18:25 +0000
Subject: [PATCH 02/12] Add: Compute appropriate number of chunks when
 num_chunk is None

---
 trainer/cli.py   | 5 ++---
 trainer/train.py | 9 +++++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/trainer/cli.py b/trainer/cli.py
index b9f05fc..326a91e 100644
--- a/trainer/cli.py
+++ b/trainer/cli.py
@@ -59,9 +59,8 @@ def __post_init__(self, motion, log_level, log_file):
     def _load_robot(self):
         self._robot = simulation.reset(self._scene, self.robot)
 
-    def train(self, output, chunk_length=3, num_chunk=50, **kwargs):
-        trained = trainer.train(self._scene, self._motion, self._robot,
-                                chunk_length, num_chunk, **kwargs)
+    def train(self, output, **kwargs):
+        trained = trainer.train(self._scene, self._motion, self._robot, **kwargs)
         trained.dump(output)
 
     def preview(self):
diff --git a/trainer/train.py b/trainer/train.py
index a1742c8..1f4e51b 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -1,7 +1,8 @@
 import numpy as np
-from typing import Dict
+from typing import Dict, Optional
 import dataclasses
 from logging import getLogger
+import math
 
 from nevergrad.optimization import optimizerlib
 from nevergrad.instrumentation import InstrumentedFunction
@@ -78,8 +79,12 @@ def step(weights):
     return reward, weights * weight_factor, state
 
 
-def train(scene, motion, robot, chunk_length=3, num_chunk=100, **kwargs):
+def train(scene: Scene, motion: flom.Motion, robot: Robot, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs):
     chunk_duration = scene.dt * chunk_length
+
+    if num_chunk is None:
+        num_chunk = math.ceil(motion.length() / chunk_duration)
+
     total_length = chunk_duration * num_chunk
     log.info(f"chunk duration: {chunk_duration} s")
     log.info(f"motion length: {motion.length()} s")

From 91d1be7d538696c05389d352e62e484ece46d3d1 Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 08:21:07 +0000
Subject: [PATCH 03/12] Change: Make some perameters keyword-only to avoid
 confusion

---
 trainer/evaluation.py | 10 +++++-----
 trainer/train.py      |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/trainer/evaluation.py b/trainer/evaluation.py
index 9714340..960ab7d 100644
--- a/trainer/evaluation.py
+++ b/trainer/evaluation.py
@@ -7,7 +7,7 @@
 from .simulation import apply_joints
 
 
-def calc_effector_reward(motion, robot, frame, ke, wl, wr):
+def calc_effector_reward(motion, robot, frame, *, ke, wl, wr):
     diff = 0
     for name, effector in frame.effectors.items():
         pose = robot.link_state(name).pose
@@ -26,7 +26,7 @@ def calc_effector_reward(motion, robot, frame, ke, wl, wr):
     return - math.exp(normalized) + 1
 
 
-def calc_stabilization_reward(frame, pre_positions, ks):
+def calc_stabilization_reward(frame, pre_positions, *, ks):
     if pre_positions is None:
         return 0
 
@@ -35,11 +35,11 @@ def calc_stabilization_reward(frame, pre_positions, ks):
     return - math.exp(normalized) + 1
 
 
-def calc_reward(motion, robot, frame, pre_positions, we=1, ws=0.1, ke=1, ks=1, wl=1, wr=0.005):
+def calc_reward(motion, robot, frame, pre_positions, *, we=1, ws=0.1, ke=1, ks=1, wl=1, wr=0.005):
     # TODO: Use more clear naming of hyperparameters
 
-    e = calc_effector_reward(motion, robot, frame, ke, wl, wr)
-    s = calc_stabilization_reward(frame, pre_positions, ks)
+    e = calc_effector_reward(motion, robot, frame, ke=ke, wl=wl, wr=wr)
+    s = calc_stabilization_reward(frame, pre_positions, ks=ks)
     return e * we + s * ws
 
 
diff --git a/trainer/train.py b/trainer/train.py
index 1f4e51b..11a16bc 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -40,7 +40,7 @@ def save(scene: Scene, robot: Robot):
         return StateWithJoints(scene.save_state(), torques)
 
 
-def train_chunk(scene: Scene, motion: flom.Motion, robot: Robot, start: float, init_weights: np.ndarray, init_state: StateWithJoints, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
+def train_chunk(scene: Scene, motion: flom.Motion, robot: Robot, start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
     weight_shape = np.array(init_weights).shape
 
     def step(weights):
@@ -79,7 +79,7 @@ def step(weights):
     return reward, weights * weight_factor, state
 
 
-def train(scene: Scene, motion: flom.Motion, robot: Robot, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs):
+def train(scene: Scene, motion: flom.Motion, robot: Robot, *, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs):
     chunk_duration = scene.dt * chunk_length
 
     if num_chunk is None:

From 2607524a525bb2bc05095ae39e939d13aabc67de Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 09:15:37 +0000
Subject: [PATCH 04/12] [WIP] Add: use many workers

---
 trainer/train.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/trainer/train.py b/trainer/train.py
index 11a16bc..32abdb3 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -3,6 +3,7 @@
 import dataclasses
 from logging import getLogger
 import math
+from concurrent import futures
 
 from nevergrad.optimization import optimizerlib
 from nevergrad.instrumentation import InstrumentedFunction
@@ -69,8 +70,9 @@ def step(weights):
     weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape)
     inst_step = InstrumentedFunction(step, weights_param)
     optimizer = optimizerlib.registry[algorithm](
-        dimension=inst_step.dimension, budget=num_iteration, num_workers=1)
-    recommendation = optimizer.optimize(inst_step)
+        dimension=inst_step.dimension, budget=num_iteration, num_workers=5)
+    with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers) as executor:
+        recommendation = optimizer.optimize(inst_step, executor=executor)
     weights = np.reshape(recommendation, weight_shape)
 
     reward = step(weights)

From b3cb9d1542f785fe6f584a42569ae39d8474fd3b Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 09:21:07 +0000
Subject: [PATCH 05/12] [WIP] Add: Take list of robots and operate them
 separately

---
 trainer/train.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/trainer/train.py b/trainer/train.py
index 32abdb3..16f1181 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -1,5 +1,5 @@
 import numpy as np
-from typing import Dict, Optional
+from typing import Dict, Optional, List
 import dataclasses
 from logging import getLogger
 import math
@@ -41,10 +41,11 @@ def save(scene: Scene, robot: Robot):
         return StateWithJoints(scene.save_state(), torques)
 
 
-def train_chunk(scene: Scene, motion: flom.Motion, robot: Robot, start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
+def train_chunk(scene: Scene, motion: flom.Motion, robots: List[Robot], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
     weight_shape = np.array(init_weights).shape
 
     def step(weights):
+        robot = threading.local().robot
         init_state.restore(scene, robot)
 
         reward_sum = 0
@@ -67,11 +68,16 @@ def step(weights):
 
         return -reward_sum
 
+    def register_thread():
+        robot = next(r for r in robots if r not in thread_robots.values())
+        threading.local().robot = robot
+
     weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape)
     inst_step = InstrumentedFunction(step, weights_param)
     optimizer = optimizerlib.registry[algorithm](
-        dimension=inst_step.dimension, budget=num_iteration, num_workers=5)
-    with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers) as executor:
+        dimension=inst_step.dimension, budget=num_iteration, num_workers=len(robots))
+
+    with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers, initializer=register_thread) as executor:
         recommendation = optimizer.optimize(inst_step, executor=executor)
     weights = np.reshape(recommendation, weight_shape)
 

From 526a74680a8680438a179d1d973ec1fd5c142fb8 Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 09:23:51 +0000
Subject: [PATCH 06/12] [WIP] Add: Take make_robot and initialize robots using
 them

---
 trainer/train.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/trainer/train.py b/trainer/train.py
index 16f1181..0761fbb 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -1,5 +1,5 @@
 import numpy as np
-from typing import Dict, Optional, List
+from typing import Dict, Optional, List, Callable
 import dataclasses
 from logging import getLogger
 import math
@@ -87,7 +87,7 @@ def register_thread():
     return reward, weights * weight_factor, state
 
 
-def train(scene: Scene, motion: flom.Motion, robot: Robot, *, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs):
+def train(scene: Scene, motion: flom.Motion, make_robot: Callable[[int], Robot], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs):
     chunk_duration = scene.dt * chunk_length
 
     if num_chunk is None:
@@ -107,6 +107,8 @@ def train(scene: Scene, motion: flom.Motion, robot: Robot, *, chunk_length: int
     log.info(f"shape of weights: {weights.shape}")
     log.debug(f"kwargs: {kwargs}")
 
+    robots = [make_robot(i) for i in range(num_workers)]
+
     last_state = StateWithJoints.save(scene, robot)
     for chunk_idx in range(num_chunk):
         start = chunk_idx * chunk_duration
@@ -115,7 +117,7 @@ def train(scene: Scene, motion: flom.Motion, robot: Robot, *, chunk_length: int
         r = range(start_idx, start_idx + chunk_length)
         in_weights = [weights[i % num_frames] for i in r]
         log.info(f"start training chunk {chunk_idx} ({start}~)")
-        reward, out_weights, last_state = train_chunk(scene, motion, robot, start, in_weights, last_state, **kwargs)
+        reward, out_weights, last_state = train_chunk(scene, motion, robots, start, in_weights, last_state, **kwargs)
         for i, w in zip(r, out_weights):
             weights[i % num_frames] = w
 

From d4dfe7118a0ad7477eaed2e57bd8ed5630de7bc7 Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 09:31:56 +0000
Subject: [PATCH 07/12] [WIP] Fix: Treat scene and robot as a pair

---
 trainer/train.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/trainer/train.py b/trainer/train.py
index 0761fbb..adf80aa 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -41,11 +41,13 @@ def save(scene: Scene, robot: Robot):
         return StateWithJoints(scene.save_state(), torques)
 
 
-def train_chunk(scene: Scene, motion: flom.Motion, robots: List[Robot], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
+def train_chunk(motion: flom.Motion, scenes_robots: List[Tuple[Scene, Robot]], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
     weight_shape = np.array(init_weights).shape
 
-    def step(weights):
-        robot = threading.local().robot
+    def step(weights, scene_robot: Tuple[Scene, Robot] = None):
+        scene_robot = scene_robot or threading.local().scene_robot
+
+        scene, robot = scene_robot
         init_state.restore(scene, robot)
 
         reward_sum = 0
@@ -69,8 +71,8 @@ def step(weights):
         return -reward_sum
 
     def register_thread():
-        robot = next(r for r in robots if r not in thread_robots.values())
-        threading.local().robot = robot
+        scene_robot = next(r for r in scenes_robots if r not in thread_robots.values())
+        threading.local().scene_robot = scene_robot
 
     weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape)
     inst_step = InstrumentedFunction(step, weights_param)
@@ -81,13 +83,14 @@ def register_thread():
         recommendation = optimizer.optimize(inst_step, executor=executor)
     weights = np.reshape(recommendation, weight_shape)
 
-    reward = step(weights)
+    scene, robot = scenes_robots[0]
+    reward = step(weights, (scene, robot))
 
     state = StateWithJoints.save(scene, robot)
     return reward, weights * weight_factor, state
 
 
-def train(scene: Scene, motion: flom.Motion, make_robot: Callable[[int], Robot], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs):
+def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs):
     chunk_duration = scene.dt * chunk_length
 
     if num_chunk is None:
@@ -107,7 +110,7 @@ def train(scene: Scene, motion: flom.Motion, make_robot: Callable[[int], Robot],
     log.info(f"shape of weights: {weights.shape}")
     log.debug(f"kwargs: {kwargs}")
 
-    robots = [make_robot(i) for i in range(num_workers)]
+    scenes_robots = [make_scene(i) for i in range(num_workers)]
 
     last_state = StateWithJoints.save(scene, robot)
     for chunk_idx in range(num_chunk):
@@ -117,7 +120,7 @@ def train(scene: Scene, motion: flom.Motion, make_robot: Callable[[int], Robot],
         r = range(start_idx, start_idx + chunk_length)
         in_weights = [weights[i % num_frames] for i in r]
         log.info(f"start training chunk {chunk_idx} ({start}~)")
-        reward, out_weights, last_state = train_chunk(scene, motion, robots, start, in_weights, last_state, **kwargs)
+        reward, out_weights, last_state = train_chunk(motion, scenes_robots, start, in_weights, last_state, **kwargs)
         for i, w in zip(r, out_weights):
             weights[i % num_frames] = w
 

From 7d6d65a7c1074252dec424c2478f4b01d6102f89 Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 09:49:46 +0000
Subject: [PATCH 08/12] [WIP] Fix: Fix some basic mistakes

---
 trainer/cli.py   |  7 ++++++-
 trainer/train.py | 28 ++++++++++++++++++----------
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/trainer/cli.py b/trainer/cli.py
index 326a91e..0a67c5e 100644
--- a/trainer/cli.py
+++ b/trainer/cli.py
@@ -60,7 +60,12 @@ def _load_robot(self):
         self._robot = simulation.reset(self._scene, self.robot)
 
     def train(self, output, **kwargs):
-        trained = trainer.train(self._scene, self._motion, self._robot, **kwargs)
+        def make_scene(_):
+            gui_client = BulletClient(connection_mode=pybullet.DIRECT)
+            scene = Scene(self.timestep, self.frame_skip, client=gui_client)
+            robot = simulation.reset(self._scene, self.robot)
+            return scene, robot
+        trained = trainer.train(self._motion, make_scene, **kwargs)
         trained.dump(output)
 
     def preview(self):
diff --git a/trainer/train.py b/trainer/train.py
index adf80aa..72aaaba 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -1,9 +1,10 @@
 import numpy as np
-from typing import Dict, Optional, List, Callable
+from typing import Dict, Optional, List, Callable, Tuple
 import dataclasses
 from logging import getLogger
 import math
 from concurrent import futures
+import threading
 
 from nevergrad.optimization import optimizerlib
 from nevergrad.instrumentation import InstrumentedFunction
@@ -44,8 +45,10 @@ def save(scene: Scene, robot: Robot):
 def train_chunk(motion: flom.Motion, scenes_robots: List[Tuple[Scene, Robot]], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
     weight_shape = np.array(init_weights).shape
 
+    thread_scene_robot = {}  # type: Dict[int, Tuple[Scene, Robot]]
+
     def step(weights, scene_robot: Tuple[Scene, Robot] = None):
-        scene_robot = scene_robot or threading.local().scene_robot
+        scene_robot = scene_robot or thread_scene_robot[threading.get_ident()]
 
         scene, robot = scene_robot
         init_state.restore(scene, robot)
@@ -70,14 +73,18 @@ def step(weights, scene_robot: Tuple[Scene, Robot] = None):
 
         return -reward_sum
 
+    used_idx = 0
+    lock = threading.Lock()
     def register_thread():
-        scene_robot = next(r for r in scenes_robots if r not in thread_robots.values())
-        threading.local().scene_robot = scene_robot
+        nonlocal used_idx, thread_scene_robot
+        with lock:
+            thread_scene_robot[threading.get_ident()] = scenes_robots[used_idx]
+            used_idx += 1
 
     weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape)
     inst_step = InstrumentedFunction(step, weights_param)
     optimizer = optimizerlib.registry[algorithm](
-        dimension=inst_step.dimension, budget=num_iteration, num_workers=len(robots))
+        dimension=inst_step.dimension, budget=num_iteration, num_workers=len(scenes_robots))
 
     with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers, initializer=register_thread) as executor:
         recommendation = optimizer.optimize(inst_step, executor=executor)
@@ -91,7 +98,10 @@ def register_thread():
 
 
 def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs):
-    chunk_duration = scene.dt * chunk_length
+    scenes_robots = [make_scene(i) for i in range(num_workers)]
+    first_scene, first_robot = scenes_robots[0]
+
+    chunk_duration = first_scene.dt * chunk_length
 
     if num_chunk is None:
         num_chunk = math.ceil(motion.length() / chunk_duration)
@@ -104,15 +114,13 @@ def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]],
     if total_length < motion.length():
         log.warning(f"A total length to train is shorter than the length of motion")
 
-    num_frames = int(motion.length() / scene.dt)
+    num_frames = int(motion.length() / first_scene.dt)
     num_joints = len(list(motion.joint_names()))  # TODO: Call len() directly
     weights = np.zeros(shape=(num_frames, num_joints))
     log.info(f"shape of weights: {weights.shape}")
     log.debug(f"kwargs: {kwargs}")
 
-    scenes_robots = [make_scene(i) for i in range(num_workers)]
-
-    last_state = StateWithJoints.save(scene, robot)
+    last_state = StateWithJoints.save(first_scene, first_robot)
     for chunk_idx in range(num_chunk):
         start = chunk_idx * chunk_duration
         start_idx = chunk_idx * chunk_length % num_frames

From 6f6e5d3bf38a33655de4faacee543be65e8f2acf Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 09:52:40 +0000
Subject: [PATCH 09/12] Add: Add Env class to use scene, robot, state as one
 object

---
 trainer/train.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/trainer/train.py b/trainer/train.py
index 72aaaba..b6e3ac6 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -42,6 +42,20 @@ def save(scene: Scene, robot: Robot):
         return StateWithJoints(scene.save_state(), torques)
 
 
+@dataclasses.dataclass
+class Env:
+    scene: Scene
+    robot: Robot
+
+    state: Optional[StateWithJoints] = None
+
+    def save(self):
+        self.state = StateWithJoints.save(self.scene, self.robot)
+
+    def restore(self):
+        self.state.restore(self.scene, self.robot)
+
+
 def train_chunk(motion: flom.Motion, scenes_robots: List[Tuple[Scene, Robot]], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
     weight_shape = np.array(init_weights).shape
 

From 7c00475df72211d5c7591c3a0888b1c3b186a955 Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 09:59:00 +0000
Subject: [PATCH 10/12] Change: Save on construction

---
 trainer/train.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/trainer/train.py b/trainer/train.py
index b6e3ac6..e2be96d 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -49,6 +49,9 @@ class Env:
 
     state: Optional[StateWithJoints] = None
 
+    def __post_init__(self):
+        self.save()
+
     def save(self):
         self.state = StateWithJoints.save(self.scene, self.robot)
 

From 37c850d1a0fbf7eadf3dbf861896d7715250a40f Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 09:59:19 +0000
Subject: [PATCH 11/12] Change: Entirely rewrite with Env

---
 trainer/train.py | 48 +++++++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/trainer/train.py b/trainer/train.py
index e2be96d..f8a3f38 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -59,32 +59,31 @@ def restore(self):
         self.state.restore(self.scene, self.robot)
 
 
-def train_chunk(motion: flom.Motion, scenes_robots: List[Tuple[Scene, Robot]], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
+def train_chunk(motion: flom.Motion, envs: List[Env], start: float, init_weights: np.ndarray, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs):
     weight_shape = np.array(init_weights).shape
 
-    thread_scene_robot = {}  # type: Dict[int, Tuple[Scene, Robot]]
+    thread_envs = {}  # type: Dict[int, Env]
 
-    def step(weights, scene_robot: Tuple[Scene, Robot] = None):
-        scene_robot = scene_robot or thread_scene_robot[threading.get_ident()]
+    def step(weights, env: Env = None):
+        env = env or thread_envs[threading.get_ident()]
 
-        scene, robot = scene_robot
-        init_state.restore(scene, robot)
+        env.restore()
 
         reward_sum = 0
-        start_ts = scene.ts
+        start_ts = env.scene.ts
 
-        pre_positions = try_get_pre_positions(scene, motion, start=start)
+        pre_positions = try_get_pre_positions(env.scene, motion, start=start)
 
         for init_weight, frame_weight in zip(init_weights, weights):
-            frame = motion.frame_at(start + scene.ts - start_ts)
+            frame = motion.frame_at(start + env.scene.ts - start_ts)
 
             frame.positions = apply_weights(
                 frame.positions, init_weight + frame_weight * weight_factor)
-            apply_joints(robot, frame.positions)
+            apply_joints(env.robot, frame.positions)
 
-            scene.step()
+            env.scene.step()
 
-            reward_sum += calc_reward(motion, robot, frame, pre_positions, **kwargs)
+            reward_sum += calc_reward(motion, env.robot, frame, pre_positions, **kwargs)
 
             pre_positions = frame.positions
 
@@ -93,32 +92,32 @@ def step(weights, scene_robot: Tuple[Scene, Robot] = None):
     used_idx = 0
     lock = threading.Lock()
     def register_thread():
-        nonlocal used_idx, thread_scene_robot
+        nonlocal used_idx, thread_envs
         with lock:
-            thread_scene_robot[threading.get_ident()] = scenes_robots[used_idx]
+            thread_envs[threading.get_ident()] = envs[used_idx]
             used_idx += 1
 
     weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape)
     inst_step = InstrumentedFunction(step, weights_param)
     optimizer = optimizerlib.registry[algorithm](
-        dimension=inst_step.dimension, budget=num_iteration, num_workers=len(scenes_robots))
+        dimension=inst_step.dimension, budget=num_iteration, num_workers=len(envs))
 
     with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers, initializer=register_thread) as executor:
         recommendation = optimizer.optimize(inst_step, executor=executor)
     weights = np.reshape(recommendation, weight_shape)
 
-    scene, robot = scenes_robots[0]
-    reward = step(weights, (scene, robot))
+    for e in envs:
+        reward = step(weights, e)
+        e.save()
 
-    state = StateWithJoints.save(scene, robot)
-    return reward, weights * weight_factor, state
+    return reward, weights * weight_factor
 
 
 def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs):
-    scenes_robots = [make_scene(i) for i in range(num_workers)]
-    first_scene, first_robot = scenes_robots[0]
+    envs = [Env(*make_scene(i)) for i in range(num_workers)]
+    first_env = envs[0]
 
-    chunk_duration = first_scene.dt * chunk_length
+    chunk_duration = first_env.scene.dt * chunk_length
 
     if num_chunk is None:
         num_chunk = math.ceil(motion.length() / chunk_duration)
@@ -131,13 +130,12 @@ def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]],
     if total_length < motion.length():
         log.warning(f"A total length to train is shorter than the length of motion")
 
-    num_frames = int(motion.length() / first_scene.dt)
+    num_frames = int(motion.length() / first_env.scene.dt)
     num_joints = len(list(motion.joint_names()))  # TODO: Call len() directly
     weights = np.zeros(shape=(num_frames, num_joints))
     log.info(f"shape of weights: {weights.shape}")
     log.debug(f"kwargs: {kwargs}")
 
-    last_state = StateWithJoints.save(first_scene, first_robot)
     for chunk_idx in range(num_chunk):
         start = chunk_idx * chunk_duration
         start_idx = chunk_idx * chunk_length % num_frames
@@ -145,7 +143,7 @@ def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]],
         r = range(start_idx, start_idx + chunk_length)
         in_weights = [weights[i % num_frames] for i in r]
         log.info(f"start training chunk {chunk_idx} ({start}~)")
-        reward, out_weights, last_state = train_chunk(motion, scenes_robots, start, in_weights, last_state, **kwargs)
+        reward, out_weights = train_chunk(motion, envs, start, in_weights, **kwargs)
         for i, w in zip(r, out_weights):
             weights[i % num_frames] = w
 

From f29932e7e38c922fc46108fdf66bc52a35cea5cd Mon Sep 17 00:00:00 2001
From: "coord.e" <me@coord-e.com>
Date: Thu, 17 Jan 2019 10:08:46 +0000
Subject: [PATCH 12/12] Fix: Fix typo

---
 trainer/cli.py   | 2 +-
 trainer/train.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/trainer/cli.py b/trainer/cli.py
index 0a67c5e..2d6afc7 100644
--- a/trainer/cli.py
+++ b/trainer/cli.py
@@ -63,7 +63,7 @@ def train(self, output, **kwargs):
         def make_scene(_):
             gui_client = BulletClient(connection_mode=pybullet.DIRECT)
             scene = Scene(self.timestep, self.frame_skip, client=gui_client)
-            robot = simulation.reset(self._scene, self.robot)
+            robot = simulation.reset(scene, self.robot)
             return scene, robot
         trained = trainer.train(self._motion, make_scene, **kwargs)
         trained.dump(output)
diff --git a/trainer/train.py b/trainer/train.py
index f8a3f38..9f42b78 100644
--- a/trainer/train.py
+++ b/trainer/train.py
@@ -157,7 +157,7 @@ def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]],
         new_motion.set_effector_weight(name, motion.effector_weight(name))
 
     for i, frame_weight in enumerate(weights):
-        t = i * scene.dt
+        t = i * first_env.scene.dt
         new_frame = motion.frame_at(t)
         new_frame.positions = apply_weights(new_frame.positions, frame_weight)
         new_motion.insert_keyframe(t, new_frame)