From 5d33a941713bf4f8988595ea4667d4697e237578 Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 08:13:51 +0000 Subject: [PATCH 01/12] Change: Conclude weight_factor use in train_chunk --- trainer/train.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/trainer/train.py b/trainer/train.py index f01b0ec..a1742c8 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -54,7 +54,7 @@ def step(weights): frame = motion.frame_at(start + scene.ts - start_ts) frame.positions = apply_weights( - frame.positions, (init_weight + frame_weight) * weight_factor) + frame.positions, init_weight + frame_weight * weight_factor) apply_joints(robot, frame.positions) scene.step() @@ -75,10 +75,10 @@ def step(weights): reward = step(weights) state = StateWithJoints.save(scene, robot) - return reward, weights, state + return reward, weights * weight_factor, state -def train(scene, motion, robot, chunk_length=3, num_chunk=100, weight_factor=0.01, **kwargs): +def train(scene, motion, robot, chunk_length=3, num_chunk=100, **kwargs): chunk_duration = scene.dt * chunk_length total_length = chunk_duration * num_chunk log.info(f"chunk duration: {chunk_duration} s") @@ -102,8 +102,7 @@ def train(scene, motion, robot, chunk_length=3, num_chunk=100, weight_factor=0.0 r = range(start_idx, start_idx + chunk_length) in_weights = [weights[i % num_frames] for i in r] log.info(f"start training chunk {chunk_idx} ({start}~)") - reward, out_weights, last_state = train_chunk( - scene, motion, robot, start, in_weights, last_state, weight_factor=weight_factor, **kwargs) + reward, out_weights, last_state = train_chunk(scene, motion, robot, start, in_weights, last_state, **kwargs) for i, w in zip(r, out_weights): weights[i % num_frames] = w @@ -119,6 +118,6 @@ def train(scene, motion, robot, chunk_length=3, num_chunk=100, weight_factor=0.0 for i, frame_weight in enumerate(weights): t = i * scene.dt new_frame = motion.frame_at(t) - new_frame.positions = apply_weights(new_frame.positions, frame_weight * weight_factor) + new_frame.positions = apply_weights(new_frame.positions, frame_weight) new_motion.insert_keyframe(t, new_frame) return new_motion From ec32a2dbd85f6ff1114fcd64c37746454bde95d6 Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 08:18:25 +0000 Subject: [PATCH 02/12] Add: Compute appropriate number of chunks when num_chunk is None --- trainer/cli.py | 5 ++--- trainer/train.py | 9 +++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/trainer/cli.py b/trainer/cli.py index b9f05fc..326a91e 100644 --- a/trainer/cli.py +++ b/trainer/cli.py @@ -59,9 +59,8 @@ def __post_init__(self, motion, log_level, log_file): def _load_robot(self): self._robot = simulation.reset(self._scene, self.robot) - def train(self, output, chunk_length=3, num_chunk=50, **kwargs): - trained = trainer.train(self._scene, self._motion, self._robot, - chunk_length, num_chunk, **kwargs) + def train(self, output, **kwargs): + trained = trainer.train(self._scene, self._motion, self._robot, **kwargs) trained.dump(output) def preview(self): diff --git a/trainer/train.py b/trainer/train.py index a1742c8..1f4e51b 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -1,7 +1,8 @@ import numpy as np -from typing import Dict +from typing import Dict, Optional import dataclasses from logging import getLogger +import math from nevergrad.optimization import optimizerlib from nevergrad.instrumentation import InstrumentedFunction @@ -78,8 +79,12 @@ def step(weights): return reward, weights * weight_factor, state -def train(scene, motion, robot, chunk_length=3, num_chunk=100, **kwargs): +def train(scene: Scene, motion: flom.Motion, robot: Robot, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs): chunk_duration = scene.dt * chunk_length + + if num_chunk is None: + num_chunk = math.ceil(motion.length() / chunk_duration) + total_length = chunk_duration * num_chunk log.info(f"chunk duration: {chunk_duration} s") log.info(f"motion length: {motion.length()} s") From 91d1be7d538696c05389d352e62e484ece46d3d1 Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 08:21:07 +0000 Subject: [PATCH 03/12] Change: Make some perameters keyword-only to avoid confusion --- trainer/evaluation.py | 10 +++++----- trainer/train.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/trainer/evaluation.py b/trainer/evaluation.py index 9714340..960ab7d 100644 --- a/trainer/evaluation.py +++ b/trainer/evaluation.py @@ -7,7 +7,7 @@ from .simulation import apply_joints -def calc_effector_reward(motion, robot, frame, ke, wl, wr): +def calc_effector_reward(motion, robot, frame, *, ke, wl, wr): diff = 0 for name, effector in frame.effectors.items(): pose = robot.link_state(name).pose @@ -26,7 +26,7 @@ def calc_effector_reward(motion, robot, frame, ke, wl, wr): return - math.exp(normalized) + 1 -def calc_stabilization_reward(frame, pre_positions, ks): +def calc_stabilization_reward(frame, pre_positions, *, ks): if pre_positions is None: return 0 @@ -35,11 +35,11 @@ def calc_stabilization_reward(frame, pre_positions, ks): return - math.exp(normalized) + 1 -def calc_reward(motion, robot, frame, pre_positions, we=1, ws=0.1, ke=1, ks=1, wl=1, wr=0.005): +def calc_reward(motion, robot, frame, pre_positions, *, we=1, ws=0.1, ke=1, ks=1, wl=1, wr=0.005): # TODO: Use more clear naming of hyperparameters - e = calc_effector_reward(motion, robot, frame, ke, wl, wr) - s = calc_stabilization_reward(frame, pre_positions, ks) + e = calc_effector_reward(motion, robot, frame, ke=ke, wl=wl, wr=wr) + s = calc_stabilization_reward(frame, pre_positions, ks=ks) return e * we + s * ws diff --git a/trainer/train.py b/trainer/train.py index 1f4e51b..11a16bc 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -40,7 +40,7 @@ def save(scene: Scene, robot: Robot): return StateWithJoints(scene.save_state(), torques) -def train_chunk(scene: Scene, motion: flom.Motion, robot: Robot, start: float, init_weights: np.ndarray, init_state: StateWithJoints, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): +def train_chunk(scene: Scene, motion: flom.Motion, robot: Robot, start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): weight_shape = np.array(init_weights).shape def step(weights): @@ -79,7 +79,7 @@ def step(weights): return reward, weights * weight_factor, state -def train(scene: Scene, motion: flom.Motion, robot: Robot, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs): +def train(scene: Scene, motion: flom.Motion, robot: Robot, *, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs): chunk_duration = scene.dt * chunk_length if num_chunk is None: From 2607524a525bb2bc05095ae39e939d13aabc67de Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 09:15:37 +0000 Subject: [PATCH 04/12] [WIP] Add: use many workers --- trainer/train.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/trainer/train.py b/trainer/train.py index 11a16bc..32abdb3 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -3,6 +3,7 @@ import dataclasses from logging import getLogger import math +from concurrent import futures from nevergrad.optimization import optimizerlib from nevergrad.instrumentation import InstrumentedFunction @@ -69,8 +70,9 @@ def step(weights): weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape) inst_step = InstrumentedFunction(step, weights_param) optimizer = optimizerlib.registry[algorithm]( - dimension=inst_step.dimension, budget=num_iteration, num_workers=1) - recommendation = optimizer.optimize(inst_step) + dimension=inst_step.dimension, budget=num_iteration, num_workers=5) + with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers) as executor: + recommendation = optimizer.optimize(inst_step, executor=executor) weights = np.reshape(recommendation, weight_shape) reward = step(weights) From b3cb9d1542f785fe6f584a42569ae39d8474fd3b Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 09:21:07 +0000 Subject: [PATCH 05/12] [WIP] Add: Take list of robots and operate them separately --- trainer/train.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/trainer/train.py b/trainer/train.py index 32abdb3..16f1181 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -1,5 +1,5 @@ import numpy as np -from typing import Dict, Optional +from typing import Dict, Optional, List import dataclasses from logging import getLogger import math @@ -41,10 +41,11 @@ def save(scene: Scene, robot: Robot): return StateWithJoints(scene.save_state(), torques) -def train_chunk(scene: Scene, motion: flom.Motion, robot: Robot, start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): +def train_chunk(scene: Scene, motion: flom.Motion, robots: List[Robot], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): weight_shape = np.array(init_weights).shape def step(weights): + robot = threading.local().robot init_state.restore(scene, robot) reward_sum = 0 @@ -67,11 +68,16 @@ def step(weights): return -reward_sum + def register_thread(): + robot = next(r for r in robots if r not in thread_robots.values()) + threading.local().robot = robot + weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape) inst_step = InstrumentedFunction(step, weights_param) optimizer = optimizerlib.registry[algorithm]( - dimension=inst_step.dimension, budget=num_iteration, num_workers=5) - with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers) as executor: + dimension=inst_step.dimension, budget=num_iteration, num_workers=len(robots)) + + with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers, initializer=register_thread) as executor: recommendation = optimizer.optimize(inst_step, executor=executor) weights = np.reshape(recommendation, weight_shape) From 526a74680a8680438a179d1d973ec1fd5c142fb8 Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 09:23:51 +0000 Subject: [PATCH 06/12] [WIP] Add: Take make_robot and initialize robots using them --- trainer/train.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/trainer/train.py b/trainer/train.py index 16f1181..0761fbb 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -1,5 +1,5 @@ import numpy as np -from typing import Dict, Optional, List +from typing import Dict, Optional, List, Callable import dataclasses from logging import getLogger import math @@ -87,7 +87,7 @@ def register_thread(): return reward, weights * weight_factor, state -def train(scene: Scene, motion: flom.Motion, robot: Robot, *, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs): +def train(scene: Scene, motion: flom.Motion, make_robot: Callable[[int], Robot], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs): chunk_duration = scene.dt * chunk_length if num_chunk is None: @@ -107,6 +107,8 @@ def train(scene: Scene, motion: flom.Motion, robot: Robot, *, chunk_length: int log.info(f"shape of weights: {weights.shape}") log.debug(f"kwargs: {kwargs}") + robots = [make_robot(i) for i in range(num_workers)] + last_state = StateWithJoints.save(scene, robot) for chunk_idx in range(num_chunk): start = chunk_idx * chunk_duration @@ -115,7 +117,7 @@ def train(scene: Scene, motion: flom.Motion, robot: Robot, *, chunk_length: int r = range(start_idx, start_idx + chunk_length) in_weights = [weights[i % num_frames] for i in r] log.info(f"start training chunk {chunk_idx} ({start}~)") - reward, out_weights, last_state = train_chunk(scene, motion, robot, start, in_weights, last_state, **kwargs) + reward, out_weights, last_state = train_chunk(scene, motion, robots, start, in_weights, last_state, **kwargs) for i, w in zip(r, out_weights): weights[i % num_frames] = w From d4dfe7118a0ad7477eaed2e57bd8ed5630de7bc7 Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 09:31:56 +0000 Subject: [PATCH 07/12] [WIP] Fix: Treat scene and robot as a pair --- trainer/train.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/trainer/train.py b/trainer/train.py index 0761fbb..adf80aa 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -41,11 +41,13 @@ def save(scene: Scene, robot: Robot): return StateWithJoints(scene.save_state(), torques) -def train_chunk(scene: Scene, motion: flom.Motion, robots: List[Robot], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): +def train_chunk(motion: flom.Motion, scenes_robots: List[Tuple[Scene, Robot]], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): weight_shape = np.array(init_weights).shape - def step(weights): - robot = threading.local().robot + def step(weights, scene_robot: Tuple[Scene, Robot] = None): + scene_robot = scene_robot or threading.local().scene_robot + + scene, robot = scene_robot init_state.restore(scene, robot) reward_sum = 0 @@ -69,8 +71,8 @@ def step(weights): return -reward_sum def register_thread(): - robot = next(r for r in robots if r not in thread_robots.values()) - threading.local().robot = robot + scene_robot = next(r for r in scenes_robots if r not in thread_robots.values()) + threading.local().scene_robot = scene_robot weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape) inst_step = InstrumentedFunction(step, weights_param) @@ -81,13 +83,14 @@ def register_thread(): recommendation = optimizer.optimize(inst_step, executor=executor) weights = np.reshape(recommendation, weight_shape) - reward = step(weights) + scene, robot = scenes_robots[0] + reward = step(weights, (scene, robot)) state = StateWithJoints.save(scene, robot) return reward, weights * weight_factor, state -def train(scene: Scene, motion: flom.Motion, make_robot: Callable[[int], Robot], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs): +def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs): chunk_duration = scene.dt * chunk_length if num_chunk is None: @@ -107,7 +110,7 @@ def train(scene: Scene, motion: flom.Motion, make_robot: Callable[[int], Robot], log.info(f"shape of weights: {weights.shape}") log.debug(f"kwargs: {kwargs}") - robots = [make_robot(i) for i in range(num_workers)] + scenes_robots = [make_scene(i) for i in range(num_workers)] last_state = StateWithJoints.save(scene, robot) for chunk_idx in range(num_chunk): @@ -117,7 +120,7 @@ def train(scene: Scene, motion: flom.Motion, make_robot: Callable[[int], Robot], r = range(start_idx, start_idx + chunk_length) in_weights = [weights[i % num_frames] for i in r] log.info(f"start training chunk {chunk_idx} ({start}~)") - reward, out_weights, last_state = train_chunk(scene, motion, robots, start, in_weights, last_state, **kwargs) + reward, out_weights, last_state = train_chunk(motion, scenes_robots, start, in_weights, last_state, **kwargs) for i, w in zip(r, out_weights): weights[i % num_frames] = w From 7d6d65a7c1074252dec424c2478f4b01d6102f89 Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 09:49:46 +0000 Subject: [PATCH 08/12] [WIP] Fix: Fix some basic mistakes --- trainer/cli.py | 7 ++++++- trainer/train.py | 28 ++++++++++++++++++---------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/trainer/cli.py b/trainer/cli.py index 326a91e..0a67c5e 100644 --- a/trainer/cli.py +++ b/trainer/cli.py @@ -60,7 +60,12 @@ def _load_robot(self): self._robot = simulation.reset(self._scene, self.robot) def train(self, output, **kwargs): - trained = trainer.train(self._scene, self._motion, self._robot, **kwargs) + def make_scene(_): + gui_client = BulletClient(connection_mode=pybullet.DIRECT) + scene = Scene(self.timestep, self.frame_skip, client=gui_client) + robot = simulation.reset(self._scene, self.robot) + return scene, robot + trained = trainer.train(self._motion, make_scene, **kwargs) trained.dump(output) def preview(self): diff --git a/trainer/train.py b/trainer/train.py index adf80aa..72aaaba 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -1,9 +1,10 @@ import numpy as np -from typing import Dict, Optional, List, Callable +from typing import Dict, Optional, List, Callable, Tuple import dataclasses from logging import getLogger import math from concurrent import futures +import threading from nevergrad.optimization import optimizerlib from nevergrad.instrumentation import InstrumentedFunction @@ -44,8 +45,10 @@ def save(scene: Scene, robot: Robot): def train_chunk(motion: flom.Motion, scenes_robots: List[Tuple[Scene, Robot]], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): weight_shape = np.array(init_weights).shape + thread_scene_robot = {} # type: Dict[int, Tuple[Scene, Robot]] + def step(weights, scene_robot: Tuple[Scene, Robot] = None): - scene_robot = scene_robot or threading.local().scene_robot + scene_robot = scene_robot or thread_scene_robot[threading.get_ident()] scene, robot = scene_robot init_state.restore(scene, robot) @@ -70,14 +73,18 @@ def step(weights, scene_robot: Tuple[Scene, Robot] = None): return -reward_sum + used_idx = 0 + lock = threading.Lock() def register_thread(): - scene_robot = next(r for r in scenes_robots if r not in thread_robots.values()) - threading.local().scene_robot = scene_robot + nonlocal used_idx, thread_scene_robot + with lock: + thread_scene_robot[threading.get_ident()] = scenes_robots[used_idx] + used_idx += 1 weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape) inst_step = InstrumentedFunction(step, weights_param) optimizer = optimizerlib.registry[algorithm]( - dimension=inst_step.dimension, budget=num_iteration, num_workers=len(robots)) + dimension=inst_step.dimension, budget=num_iteration, num_workers=len(scenes_robots)) with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers, initializer=register_thread) as executor: recommendation = optimizer.optimize(inst_step, executor=executor) @@ -91,7 +98,10 @@ def register_thread(): def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs): - chunk_duration = scene.dt * chunk_length + scenes_robots = [make_scene(i) for i in range(num_workers)] + first_scene, first_robot = scenes_robots[0] + + chunk_duration = first_scene.dt * chunk_length if num_chunk is None: num_chunk = math.ceil(motion.length() / chunk_duration) @@ -104,15 +114,13 @@ def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], if total_length < motion.length(): log.warning(f"A total length to train is shorter than the length of motion") - num_frames = int(motion.length() / scene.dt) + num_frames = int(motion.length() / first_scene.dt) num_joints = len(list(motion.joint_names())) # TODO: Call len() directly weights = np.zeros(shape=(num_frames, num_joints)) log.info(f"shape of weights: {weights.shape}") log.debug(f"kwargs: {kwargs}") - scenes_robots = [make_scene(i) for i in range(num_workers)] - - last_state = StateWithJoints.save(scene, robot) + last_state = StateWithJoints.save(first_scene, first_robot) for chunk_idx in range(num_chunk): start = chunk_idx * chunk_duration start_idx = chunk_idx * chunk_length % num_frames From 6f6e5d3bf38a33655de4faacee543be65e8f2acf Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 09:52:40 +0000 Subject: [PATCH 09/12] Add: Add Env class to use scene, robot, state as one object --- trainer/train.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/trainer/train.py b/trainer/train.py index 72aaaba..b6e3ac6 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -42,6 +42,20 @@ def save(scene: Scene, robot: Robot): return StateWithJoints(scene.save_state(), torques) +@dataclasses.dataclass +class Env: + scene: Scene + robot: Robot + + state: Optional[StateWithJoints] = None + + def save(self): + self.state = StateWithJoints.save(self.scene, self.robot) + + def restore(self): + self.state.restore(self.scene, self.robot) + + def train_chunk(motion: flom.Motion, scenes_robots: List[Tuple[Scene, Robot]], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): weight_shape = np.array(init_weights).shape From 7c00475df72211d5c7591c3a0888b1c3b186a955 Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 09:59:00 +0000 Subject: [PATCH 10/12] Change: Save on construction --- trainer/train.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/trainer/train.py b/trainer/train.py index b6e3ac6..e2be96d 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -49,6 +49,9 @@ class Env: state: Optional[StateWithJoints] = None + def __post_init__(self): + self.save() + def save(self): self.state = StateWithJoints.save(self.scene, self.robot) From 37c850d1a0fbf7eadf3dbf861896d7715250a40f Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 09:59:19 +0000 Subject: [PATCH 11/12] Change: Entirely rewrite with Env --- trainer/train.py | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/trainer/train.py b/trainer/train.py index e2be96d..f8a3f38 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -59,32 +59,31 @@ def restore(self): self.state.restore(self.scene, self.robot) -def train_chunk(motion: flom.Motion, scenes_robots: List[Tuple[Scene, Robot]], start: float, init_weights: np.ndarray, init_state: StateWithJoints, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): +def train_chunk(motion: flom.Motion, envs: List[Env], start: float, init_weights: np.ndarray, *, algorithm: str = 'OnePlusOne', num_iteration: int = 1000, weight_factor: float = 0.01, stddev: float = 1, **kwargs): weight_shape = np.array(init_weights).shape - thread_scene_robot = {} # type: Dict[int, Tuple[Scene, Robot]] + thread_envs = {} # type: Dict[int, Env] - def step(weights, scene_robot: Tuple[Scene, Robot] = None): - scene_robot = scene_robot or thread_scene_robot[threading.get_ident()] + def step(weights, env: Env = None): + env = env or thread_envs[threading.get_ident()] - scene, robot = scene_robot - init_state.restore(scene, robot) + env.restore() reward_sum = 0 - start_ts = scene.ts + start_ts = env.scene.ts - pre_positions = try_get_pre_positions(scene, motion, start=start) + pre_positions = try_get_pre_positions(env.scene, motion, start=start) for init_weight, frame_weight in zip(init_weights, weights): - frame = motion.frame_at(start + scene.ts - start_ts) + frame = motion.frame_at(start + env.scene.ts - start_ts) frame.positions = apply_weights( frame.positions, init_weight + frame_weight * weight_factor) - apply_joints(robot, frame.positions) + apply_joints(env.robot, frame.positions) - scene.step() + env.scene.step() - reward_sum += calc_reward(motion, robot, frame, pre_positions, **kwargs) + reward_sum += calc_reward(motion, env.robot, frame, pre_positions, **kwargs) pre_positions = frame.positions @@ -93,32 +92,32 @@ def step(weights, scene_robot: Tuple[Scene, Robot] = None): used_idx = 0 lock = threading.Lock() def register_thread(): - nonlocal used_idx, thread_scene_robot + nonlocal used_idx, thread_envs with lock: - thread_scene_robot[threading.get_ident()] = scenes_robots[used_idx] + thread_envs[threading.get_ident()] = envs[used_idx] used_idx += 1 weights_param = Gaussian(mean=0, std=stddev, shape=weight_shape) inst_step = InstrumentedFunction(step, weights_param) optimizer = optimizerlib.registry[algorithm]( - dimension=inst_step.dimension, budget=num_iteration, num_workers=len(scenes_robots)) + dimension=inst_step.dimension, budget=num_iteration, num_workers=len(envs)) with futures.ThreadPoolExecutor(max_workers=optimizer.num_workers, initializer=register_thread) as executor: recommendation = optimizer.optimize(inst_step, executor=executor) weights = np.reshape(recommendation, weight_shape) - scene, robot = scenes_robots[0] - reward = step(weights, (scene, robot)) + for e in envs: + reward = step(weights, e) + e.save() - state = StateWithJoints.save(scene, robot) - return reward, weights * weight_factor, state + return reward, weights * weight_factor def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], *, num_workers: int = 5, chunk_length: int = 3, num_chunk: Optional[int] = None, **kwargs): - scenes_robots = [make_scene(i) for i in range(num_workers)] - first_scene, first_robot = scenes_robots[0] + envs = [Env(*make_scene(i)) for i in range(num_workers)] + first_env = envs[0] - chunk_duration = first_scene.dt * chunk_length + chunk_duration = first_env.scene.dt * chunk_length if num_chunk is None: num_chunk = math.ceil(motion.length() / chunk_duration) @@ -131,13 +130,12 @@ def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], if total_length < motion.length(): log.warning(f"A total length to train is shorter than the length of motion") - num_frames = int(motion.length() / first_scene.dt) + num_frames = int(motion.length() / first_env.scene.dt) num_joints = len(list(motion.joint_names())) # TODO: Call len() directly weights = np.zeros(shape=(num_frames, num_joints)) log.info(f"shape of weights: {weights.shape}") log.debug(f"kwargs: {kwargs}") - last_state = StateWithJoints.save(first_scene, first_robot) for chunk_idx in range(num_chunk): start = chunk_idx * chunk_duration start_idx = chunk_idx * chunk_length % num_frames @@ -145,7 +143,7 @@ def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], r = range(start_idx, start_idx + chunk_length) in_weights = [weights[i % num_frames] for i in r] log.info(f"start training chunk {chunk_idx} ({start}~)") - reward, out_weights, last_state = train_chunk(motion, scenes_robots, start, in_weights, last_state, **kwargs) + reward, out_weights = train_chunk(motion, envs, start, in_weights, **kwargs) for i, w in zip(r, out_weights): weights[i % num_frames] = w From f29932e7e38c922fc46108fdf66bc52a35cea5cd Mon Sep 17 00:00:00 2001 From: "coord.e" Date: Thu, 17 Jan 2019 10:08:46 +0000 Subject: [PATCH 12/12] Fix: Fix typo --- trainer/cli.py | 2 +- trainer/train.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/trainer/cli.py b/trainer/cli.py index 0a67c5e..2d6afc7 100644 --- a/trainer/cli.py +++ b/trainer/cli.py @@ -63,7 +63,7 @@ def train(self, output, **kwargs): def make_scene(_): gui_client = BulletClient(connection_mode=pybullet.DIRECT) scene = Scene(self.timestep, self.frame_skip, client=gui_client) - robot = simulation.reset(self._scene, self.robot) + robot = simulation.reset(scene, self.robot) return scene, robot trained = trainer.train(self._motion, make_scene, **kwargs) trained.dump(output) diff --git a/trainer/train.py b/trainer/train.py index f8a3f38..9f42b78 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -157,7 +157,7 @@ def train(motion: flom.Motion, make_scene: Callable[[int], Tuple[Scene, Robot]], new_motion.set_effector_weight(name, motion.effector_weight(name)) for i, frame_weight in enumerate(weights): - t = i * scene.dt + t = i * first_env.scene.dt new_frame = motion.frame_at(t) new_frame.positions = apply_weights(new_frame.positions, frame_weight) new_motion.insert_keyframe(t, new_frame)