diff --git a/.benchmark_pattern b/.benchmark_pattern
old mode 100644
new mode 100755
diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
index a41103d0b7..3387b64eee
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,229 @@
+
+# Created by https://www.gitignore.io/api/emacs,macos,python,jupyternotebook,jupyternotebooks
+# Edit at https://www.gitignore.io/?templates=emacs,macos,python,jupyternotebook,jupyternotebooks
+
+### Emacs ###
+# -*- mode: gitignore; -*-
+*~
+\#*\#
+/.emacs.desktop
+/.emacs.desktop.lock
+*.elc
+auto-save-list
+tramp
+.\#*
+
+# Org-mode
+.org-id-locations
+*_archive
+
+# flymake-mode
+*_flymake.*
+
+# eshell files
+/eshell/history
+/eshell/lastdir
+
+# elpa packages
+/elpa/
+
+# reftex files
+*.rel
+
+# AUCTeX auto folder
+/auto/
+
+# cask packages
+.cask/
+dist/
+
+# Flycheck
+flycheck_*.el
+
+# server auth directory
+/server/
+
+# projectiles files
+.projectile
+
+# directory configuration
+.dir-locals.el
+
+# network security
+/network-security.data
+
+
+### JupyterNotebook ###
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+
+# Remove previous ipynb_checkpoints
+# git rm -r .ipynb_checkpoints/
+#
+
+### JupyterNotebooks ###
+# gitignore template for Jupyter Notebooks
+# website: http://jupyter.org/
+
+
+# Remove previous ipynb_checkpoints
+# git rm -r .ipynb_checkpoints/
+#
+
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+### Python Patch ###
+.venv/
+
+# End of https://www.gitignore.io/api/emacs,macos,python,jupyternotebook,jupyternotebooks
+
+
+# -------------------------------------------------------------------------------
+# ==================
+# Open AI Settings
+# ==================
+
*.swp
*.pyc
*.pkl
@@ -34,3 +260,6 @@ src
.cache
MUJOCO_LOG.TXT
+TRAIN.sh
+MAKE_TRAINING_DATA.sh
+projection/*.sh
diff --git a/.gitmodules b/.gitmodules
new file mode 100755
index 0000000000..7ad9e4aadf
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "mujoco-py"]
+ path = mujoco-py
+ url = git@github.com:openai/mujoco-py.git
diff --git a/.travis.yml b/.travis.yml
old mode 100644
new mode 100755
diff --git a/Dockerfile b/Dockerfile
old mode 100644
new mode 100755
diff --git a/LICENSE b/LICENSE
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
index e4f8697d09..2d5ef49525
--- a/README.md
+++ b/README.md
@@ -1,26 +1,54 @@
- [![Build status](https://travis-ci.org/openai/baselines.svg?branch=master)](https://travis-ci.org/openai/baselines)
+# HumanWareFundemental: Sysngy Team
+This repository is cloned from [openai/baselines](https://github.com/openai/baselines) and modifided for our reseach. Don't make PR for ofiginal repogitory.
-# Baselines
-OpenAI Baselines is a set of high-quality implementations of reinforcement learning algorithms.
+## Train model with DDPG
+以下のコマンドで学習済みモデルを作成する. tensorflowのモデルを保存するディレクトリを`--lodir_tf` で指定する.
+
+例
+```
+python -m baselines.her.experiment.train \
+ --env GraspBlock-v0 \
+ --num_cpu 1 \
+ --n_epochs 100 \
+ --logdir_tf < Dierctory path to save tensorflow model>
+```
+
+
+## Action and Q-value Generation
+以下のコマンドで学習モデルをロードし, 指定したディレクトリにアクションなどを書き出す. `--logdir_tf`で学習済みのモデルを指定し, `--logdir_aq`でactionやQ-valueなどを出力するディレクトリを指定する.
-These algorithms will make it easier for the research community to replicate, refine, and identify new ideas, and will create good baselines to build research on top of. Our DQN implementation and its variants are roughly on par with the scores in published papers. We expect they will be used as a base around which new ideas can be added, and as a tool for comparing a new approach against existing ones.
-## Prerequisites
-Baselines requires python3 (>=3.5) with the development headers. You'll also need system packages CMake, OpenMPI and zlib. Those can be installed as follows
-### Ubuntu
-
-```bash
-sudo apt-get update && sudo apt-get install cmake libopenmpi-dev python3-dev zlib1g-dev
```
-
-### Mac OS X
-Installation of system packages on Mac requires [Homebrew](https://brew.sh). With Homebrew installed, run the following:
-```bash
-brew install cmake openmpi
+python -m baselines.her.experiment.test \
+ --env GraspBlock-v0 \
+ --num_cpu 1 --n_epochs 5 \
+ --logdir_tf < path to saved model > \
+ --logdir_aq < path to save actions etc... >
```
-
-## Virtual environment
+
+### Log File
+ログファイルには以下の項目が記述されている.
+
++ `goal/desired`: ゴール (`g`)
++ `goal/achieved`: 到達点 (`ag`)
++ `observation`: 観測 (`o`)
++ `action`: action, shape=[EpisodeNo, Batch, Sequence, env.action_space]
++ `Qvalue`: Q-value, shape=[EpisodeNo, Batch, Sequence, env.action_space]
++ `fc`: Critic Networkの中間出力 (fc2), shape=[EpisodeNo, Batch, Sequence, n_unit(=256)]
+
+
+
+
+
+--------------------------------------
+## Memo
+TBA
+
+
+----------------------------------------
+## Initial Setup
+### Virtual environment
From the general python package sanity perspective, it is a good idea to use virtual environments (virtualenvs) to make sure packages from different projects do not interfere with each other. You can install virtualenv (which is itself a pip package) via
```bash
pip install virtualenv
@@ -37,7 +65,7 @@ To activate a virtualenv:
More thorough tutorial on virtualenvs and options can be found [here](https://virtualenv.pypa.io/en/stable/)
-## Installation
+### Installation
- Clone the repo and cd into it:
```bash
git clone https://github.com/openai/baselines.git
@@ -59,89 +87,16 @@ More thorough tutorial on virtualenvs and options can be found [here](https://vi
pip install -e .
```
-### MuJoCo
-Some of the baselines examples use [MuJoCo](http://www.mujoco.org) (multi-joint dynamics in contact) physics simulator, which is proprietary and requires binaries and a license (temporary 30-day license can be obtained from [www.mujoco.org](http://www.mujoco.org)). Instructions on setting up MuJoCo can be found [here](https://github.com/openai/mujoco-py)
-
-## Testing the installation
-All unit tests in baselines can be run using pytest runner:
-```
-pip install pytest
-pytest
-```
+- Install original environment
-## Training models
-Most of the algorithms in baselines repo are used as follows:
```bash
-python -m baselines.run --alg= --env= [additional arguments]
+cd gym-grasp
+pip install -e .
```
-### Example 1. PPO with MuJoCo Humanoid
-For instance, to train a fully-connected network controlling MuJoCo humanoid using PPO2 for 20M timesteps
-```bash
-python -m baselines.run --alg=ppo2 --env=Humanoid-v2 --network=mlp --num_timesteps=2e7
-```
-Note that for mujoco environments fully-connected network is default, so we can omit `--network=mlp`
-The hyperparameters for both network and the learning algorithm can be controlled via the command line, for instance:
-```bash
-python -m baselines.run --alg=ppo2 --env=Humanoid-v2 --network=mlp --num_timesteps=2e7 --ent_coef=0.1 --num_hidden=32 --num_layers=3 --value_network=copy
-```
-will set entropy coefficient to 0.1, and construct fully connected network with 3 layers with 32 hidden units in each, and create a separate network for value function estimation (so that its parameters are not shared with the policy network, but the structure is the same)
-
-See docstrings in [common/models.py](baselines/common/models.py) for description of network parameters for each type of model, and
-docstring for [baselines/ppo2/ppo2.py/learn()](baselines/ppo2/ppo2.py#L152) for the description of the ppo2 hyperparamters.
-
-### Example 2. DQN on Atari
-DQN with Atari is at this point a classics of benchmarks. To run the baselines implementation of DQN on Atari Pong:
-```
-python -m baselines.run --alg=deepq --env=PongNoFrameskip-v4 --num_timesteps=1e6
-```
-
-## Saving, loading and visualizing models
-The algorithms serialization API is not properly unified yet; however, there is a simple method to save / restore trained models.
-`--save_path` and `--load_path` command-line option loads the tensorflow state from a given path before training, and saves it after the training, respectively.
-Let's imagine you'd like to train ppo2 on Atari Pong, save the model and then later visualize what has it learnt.
-```bash
-python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=2e7 --save_path=~/models/pong_20M_ppo2
-```
-This should get to the mean reward per episode about 20. To load and visualize the model, we'll do the following - load the model, train it for 0 steps, and then visualize:
-```bash
-python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=0 --load_path=~/models/pong_20M_ppo2 --play
-```
-
-*NOTE:* At the moment Mujoco training uses VecNormalize wrapper for the environment which is not being saved correctly; so loading the models trained on Mujoco will not work well if the environment is recreated. If necessary, you can work around that by replacing RunningMeanStd by TfRunningMeanStd in [baselines/common/vec_env/vec_normalize.py](baselines/common/vec_env/vec_normalize.py#L12). This way, mean and std of environment normalizing wrapper will be saved in tensorflow variables and included in the model file; however, training is slower that way - hence not including it by default
-
-## Loading and vizualizing learning curves and other training metrics
-See [here](docs/viz/viz.ipynb) for instructions on how to load and display the training data.
-## Subpackages
-- [A2C](baselines/a2c)
-- [ACER](baselines/acer)
-- [ACKTR](baselines/acktr)
-- [DDPG](baselines/ddpg)
-- [DQN](baselines/deepq)
-- [GAIL](baselines/gail)
-- [HER](baselines/her)
-- [PPO1](baselines/ppo1) (obsolete version, left here temporarily)
-- [PPO2](baselines/ppo2)
-- [TRPO](baselines/trpo_mpi)
+### MuJoCo
+Some of the baselines examples use [MuJoCo](http://www.mujoco.org) (multi-joint dynamics in contact) physics simulator, which is proprietary and requires binaries and a license (temporary 30-day license can be obtained from [www.mujoco.org](http://www.mujoco.org)). Instructions on setting up MuJoCo can be found [here](https://github.com/openai/mujoco-py)
-## Benchmarks
-Results of benchmarks on Mujoco (1M timesteps) and Atari (10M timesteps) are available
-[here for Mujoco](https://htmlpreview.github.com/?https://github.com/openai/baselines/blob/master/benchmarks_mujoco1M.htm)
-and
-[here for Atari](https://htmlpreview.github.com/?https://github.com/openai/baselines/blob/master/benchmarks_atari10M.htm)
-respectively. Note that these results may be not on the latest version of the code, particular commit hash with which results were obtained is specified on the benchmarks page.
-
-To cite this repository in publications:
-
- @misc{baselines,
- author = {Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai and Zhokhov, Peter},
- title = {OpenAI Baselines},
- year = {2017},
- publisher = {GitHub},
- journal = {GitHub repository},
- howpublished = {\url{https://github.com/openai/baselines}},
- }
-
diff --git a/baselines/__init__.py b/baselines/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/a2c/README.md b/baselines/a2c/README.md
old mode 100644
new mode 100755
diff --git a/baselines/a2c/__init__.py b/baselines/a2c/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/a2c/a2c.py b/baselines/a2c/a2c.py
old mode 100644
new mode 100755
diff --git a/baselines/a2c/runner.py b/baselines/a2c/runner.py
old mode 100644
new mode 100755
diff --git a/baselines/a2c/utils.py b/baselines/a2c/utils.py
old mode 100644
new mode 100755
diff --git a/baselines/acer/README.md b/baselines/acer/README.md
old mode 100644
new mode 100755
diff --git a/baselines/acer/__init__.py b/baselines/acer/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/acer/acer.py b/baselines/acer/acer.py
old mode 100644
new mode 100755
diff --git a/baselines/acer/buffer.py b/baselines/acer/buffer.py
old mode 100644
new mode 100755
diff --git a/baselines/acer/defaults.py b/baselines/acer/defaults.py
old mode 100644
new mode 100755
diff --git a/baselines/acer/policies.py b/baselines/acer/policies.py
old mode 100644
new mode 100755
diff --git a/baselines/acer/runner.py b/baselines/acer/runner.py
old mode 100644
new mode 100755
diff --git a/baselines/acktr/README.md b/baselines/acktr/README.md
old mode 100644
new mode 100755
diff --git a/baselines/acktr/__init__.py b/baselines/acktr/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/acktr/acktr.py b/baselines/acktr/acktr.py
old mode 100644
new mode 100755
diff --git a/baselines/acktr/defaults.py b/baselines/acktr/defaults.py
old mode 100644
new mode 100755
diff --git a/baselines/acktr/kfac.py b/baselines/acktr/kfac.py
old mode 100644
new mode 100755
diff --git a/baselines/acktr/kfac_utils.py b/baselines/acktr/kfac_utils.py
old mode 100644
new mode 100755
diff --git a/baselines/acktr/utils.py b/baselines/acktr/utils.py
old mode 100644
new mode 100755
diff --git a/baselines/bench/__init__.py b/baselines/bench/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/bench/benchmarks.py b/baselines/bench/benchmarks.py
old mode 100644
new mode 100755
diff --git a/baselines/bench/monitor.py b/baselines/bench/monitor.py
old mode 100644
new mode 100755
diff --git a/baselines/common/__init__.py b/baselines/common/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/common/atari_wrappers.py b/baselines/common/atari_wrappers.py
old mode 100644
new mode 100755
diff --git a/baselines/common/cg.py b/baselines/common/cg.py
old mode 100644
new mode 100755
diff --git a/baselines/common/cmd_util.py b/baselines/common/cmd_util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/console_util.py b/baselines/common/console_util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/dataset.py b/baselines/common/dataset.py
old mode 100644
new mode 100755
diff --git a/baselines/common/distributions.py b/baselines/common/distributions.py
old mode 100644
new mode 100755
diff --git a/baselines/common/input.py b/baselines/common/input.py
old mode 100644
new mode 100755
diff --git a/baselines/common/math_util.py b/baselines/common/math_util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/misc_util.py b/baselines/common/misc_util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/models.py b/baselines/common/models.py
old mode 100644
new mode 100755
diff --git a/baselines/common/mpi_adam.py b/baselines/common/mpi_adam.py
old mode 100644
new mode 100755
diff --git a/baselines/common/mpi_adam_optimizer.py b/baselines/common/mpi_adam_optimizer.py
old mode 100644
new mode 100755
diff --git a/baselines/common/mpi_fork.py b/baselines/common/mpi_fork.py
old mode 100644
new mode 100755
diff --git a/baselines/common/mpi_moments.py b/baselines/common/mpi_moments.py
old mode 100644
new mode 100755
diff --git a/baselines/common/mpi_running_mean_std.py b/baselines/common/mpi_running_mean_std.py
old mode 100644
new mode 100755
diff --git a/baselines/common/mpi_util.py b/baselines/common/mpi_util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/plot_util.py b/baselines/common/plot_util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/policies.py b/baselines/common/policies.py
old mode 100644
new mode 100755
diff --git a/baselines/common/retro_wrappers.py b/baselines/common/retro_wrappers.py
old mode 100644
new mode 100755
diff --git a/baselines/common/runners.py b/baselines/common/runners.py
old mode 100644
new mode 100755
diff --git a/baselines/common/running_mean_std.py b/baselines/common/running_mean_std.py
old mode 100644
new mode 100755
diff --git a/baselines/common/schedules.py b/baselines/common/schedules.py
old mode 100644
new mode 100755
diff --git a/baselines/common/segment_tree.py b/baselines/common/segment_tree.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/__init__.py b/baselines/common/tests/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/envs/__init__.py b/baselines/common/tests/envs/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/envs/fixed_sequence_env.py b/baselines/common/tests/envs/fixed_sequence_env.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/envs/identity_env.py b/baselines/common/tests/envs/identity_env.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/envs/mnist_env.py b/baselines/common/tests/envs/mnist_env.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_cartpole.py b/baselines/common/tests/test_cartpole.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_doc_examples.py b/baselines/common/tests/test_doc_examples.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_env_after_learn.py b/baselines/common/tests/test_env_after_learn.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_fixed_sequence.py b/baselines/common/tests/test_fixed_sequence.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_identity.py b/baselines/common/tests/test_identity.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_mnist.py b/baselines/common/tests/test_mnist.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_schedules.py b/baselines/common/tests/test_schedules.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_segment_tree.py b/baselines/common/tests/test_segment_tree.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_serialization.py b/baselines/common/tests/test_serialization.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/test_tf_util.py b/baselines/common/tests/test_tf_util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tests/util.py b/baselines/common/tests/util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tf_util.py b/baselines/common/tf_util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/tile_images.py b/baselines/common/tile_images.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/__init__.py b/baselines/common/vec_env/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/dummy_vec_env.py b/baselines/common/vec_env/dummy_vec_env.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/shmem_vec_env.py b/baselines/common/vec_env/shmem_vec_env.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/subproc_vec_env.py b/baselines/common/vec_env/subproc_vec_env.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/test_vec_env.py b/baselines/common/vec_env/test_vec_env.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/test_video_recorder.py b/baselines/common/vec_env/test_video_recorder.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/util.py b/baselines/common/vec_env/util.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/vec_frame_stack.py b/baselines/common/vec_env/vec_frame_stack.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/vec_monitor.py b/baselines/common/vec_env/vec_monitor.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/vec_normalize.py b/baselines/common/vec_env/vec_normalize.py
old mode 100644
new mode 100755
diff --git a/baselines/common/vec_env/vec_video_recorder.py b/baselines/common/vec_env/vec_video_recorder.py
old mode 100644
new mode 100755
diff --git a/baselines/custom_logger.py b/baselines/custom_logger.py
new file mode 100755
index 0000000000..ee17699142
--- /dev/null
+++ b/baselines/custom_logger.py
@@ -0,0 +1,15 @@
+import datetime as dt
+
+"""
+For Corlor, check this site.
++ https://qiita.com/ironguy/items/8fb3ddadb3c4c986496d
+"""
+
+class CustomLoggerObject(object):
+ def __init__(self):
+ self.LOG_FMT = "{color}| {asctime} | {levelname:<5s} | {message} \033[0m"
+
+ def info(self, msg):
+ asctime = dt.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
+ print(self.LOG_FMT.format(color="\033[37m", asctime=asctime, levelname="INFO", message=msg))
+
diff --git a/baselines/ddpg/ddpg.py b/baselines/ddpg/ddpg.py
index 37551d4931..35c8e17782 100755
--- a/baselines/ddpg/ddpg.py
+++ b/baselines/ddpg/ddpg.py
@@ -11,6 +11,11 @@
import baselines.common.tf_util as U
from baselines import logger
+# --------------------------------------------------------------------------------------
+from baselines.custom_logger import CustomLoggerObject
+clogger = CustomLoggerObject()
+clogger.info("MyLogger is working!!")
+# --------------------------------------------------------------------------------------
import numpy as np
try:
@@ -118,6 +123,7 @@ def learn(network, env,
start_time = time.time()
+ clogger.info("Start Training [nb_epochs={}]".format(nb_epochs))
epoch_episode_rewards = []
epoch_episode_steps = []
@@ -125,6 +131,7 @@ def learn(network, env,
epoch_qs = []
epoch_episodes = 0
for epoch in range(nb_epochs):
+ clogger.info("Start Epoch={}".format(epoch))
for cycle in range(nb_epoch_cycles):
# Perform rollouts.
if nenvs > 1:
@@ -134,7 +141,7 @@ def learn(network, env,
for t_rollout in range(nb_rollout_steps):
# Predict next action.
action, q, _, _ = agent.step(obs, apply_noise=True, compute_Q=True)
-
+ clogger.info("action.shape={}, q={}".format(action.shape, q))
# Execute next action.
if rank == 0 and render:
env.render()
@@ -210,6 +217,7 @@ def learn(network, env,
mpi_size = MPI.COMM_WORLD.Get_size()
else:
mpi_size = 1
+ clogger.info("Finish Training {}".format(time.time()))
# Log stats.
# XXX shouldn't call np.mean on variable length lists
diff --git a/baselines/deepq/README.md b/baselines/deepq/README.md
old mode 100644
new mode 100755
diff --git a/baselines/deepq/__init__.py b/baselines/deepq/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/build_graph.py b/baselines/deepq/build_graph.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/deepq.py b/baselines/deepq/deepq.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/defaults.py b/baselines/deepq/defaults.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/experiments/__init__.py b/baselines/deepq/experiments/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/experiments/custom_cartpole.py b/baselines/deepq/experiments/custom_cartpole.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/experiments/enjoy_cartpole.py b/baselines/deepq/experiments/enjoy_cartpole.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/experiments/enjoy_mountaincar.py b/baselines/deepq/experiments/enjoy_mountaincar.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/experiments/enjoy_pong.py b/baselines/deepq/experiments/enjoy_pong.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/experiments/train_cartpole.py b/baselines/deepq/experiments/train_cartpole.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/experiments/train_mountaincar.py b/baselines/deepq/experiments/train_mountaincar.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/experiments/train_pong.py b/baselines/deepq/experiments/train_pong.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/models.py b/baselines/deepq/models.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/replay_buffer.py b/baselines/deepq/replay_buffer.py
old mode 100644
new mode 100755
diff --git a/baselines/deepq/utils.py b/baselines/deepq/utils.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/README.md b/baselines/gail/README.md
old mode 100644
new mode 100755
diff --git a/baselines/gail/__init__.py b/baselines/gail/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/adversary.py b/baselines/gail/adversary.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/behavior_clone.py b/baselines/gail/behavior_clone.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/dataset/__init__.py b/baselines/gail/dataset/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/dataset/mujoco_dset.py b/baselines/gail/dataset/mujoco_dset.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/gail-eval.py b/baselines/gail/gail-eval.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/mlp_policy.py b/baselines/gail/mlp_policy.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/HalfCheetah-normalized-deterministic-scores.png b/baselines/gail/result/HalfCheetah-normalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/HalfCheetah-normalized-stochastic-scores.png b/baselines/gail/result/HalfCheetah-normalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/HalfCheetah-unnormalized-deterministic-scores.png b/baselines/gail/result/HalfCheetah-unnormalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/HalfCheetah-unnormalized-stochastic-scores.png b/baselines/gail/result/HalfCheetah-unnormalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Hopper-normalized-deterministic-scores.png b/baselines/gail/result/Hopper-normalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Hopper-normalized-stochastic-scores.png b/baselines/gail/result/Hopper-normalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Hopper-unnormalized-deterministic-scores.png b/baselines/gail/result/Hopper-unnormalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Hopper-unnormalized-stochastic-scores.png b/baselines/gail/result/Hopper-unnormalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Humanoid-normalized-deterministic-scores.png b/baselines/gail/result/Humanoid-normalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Humanoid-normalized-stochastic-scores.png b/baselines/gail/result/Humanoid-normalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Humanoid-unnormalized-deterministic-scores.png b/baselines/gail/result/Humanoid-unnormalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Humanoid-unnormalized-stochastic-scores.png b/baselines/gail/result/Humanoid-unnormalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/HumanoidStandup-normalized-deterministic-scores.png b/baselines/gail/result/HumanoidStandup-normalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/HumanoidStandup-normalized-stochastic-scores.png b/baselines/gail/result/HumanoidStandup-normalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/HumanoidStandup-unnormalized-deterministic-scores.png b/baselines/gail/result/HumanoidStandup-unnormalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/HumanoidStandup-unnormalized-stochastic-scores.png b/baselines/gail/result/HumanoidStandup-unnormalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Walker2d-normalized-deterministic-scores.png b/baselines/gail/result/Walker2d-normalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Walker2d-normalized-stochastic-scores.png b/baselines/gail/result/Walker2d-normalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Walker2d-unnormalized-deterministic-scores.png b/baselines/gail/result/Walker2d-unnormalized-deterministic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/Walker2d-unnormalized-stochastic-scores.png b/baselines/gail/result/Walker2d-unnormalized-stochastic-scores.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/gail-result.md b/baselines/gail/result/gail-result.md
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/halfcheetah-training.png b/baselines/gail/result/halfcheetah-training.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/hopper-training.png b/baselines/gail/result/hopper-training.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/humanoid-training.png b/baselines/gail/result/humanoid-training.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/humanoidstandup-training.png b/baselines/gail/result/humanoidstandup-training.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/result/walker2d-training.png b/baselines/gail/result/walker2d-training.png
old mode 100644
new mode 100755
diff --git a/baselines/gail/run_mujoco.py b/baselines/gail/run_mujoco.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/statistics.py b/baselines/gail/statistics.py
old mode 100644
new mode 100755
diff --git a/baselines/gail/trpo_mpi.py b/baselines/gail/trpo_mpi.py
old mode 100644
new mode 100755
diff --git a/baselines/her/README.md b/baselines/her/README.md
old mode 100644
new mode 100755
diff --git a/baselines/her/__init__.py b/baselines/her/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/her/actor_critic.py b/baselines/her/actor_critic.py
old mode 100644
new mode 100755
index d5443fe0c3..596b234f93
--- a/baselines/her/actor_critic.py
+++ b/baselines/her/actor_critic.py
@@ -32,8 +32,33 @@ def __init__(self, inputs_tf, dimo, dimg, dimu, max_u, o_stats, g_stats, hidden,
# Networks.
with tf.variable_scope('pi'):
- self.pi_tf = self.max_u * tf.tanh(nn(
- input_pi, [self.hidden] * self.layers + [self.dimu]))
+ # self.pi_tf = self.max_u * tf.tanh(nn(
+ # input_pi, [self.hidden] * self.layers + [self.dimu]))
+
+ # 3-Layers FC Network
+ ## FC1
+ fc1 = tf.layers.dense(inputs=input_pi,
+ units=self.hidden,
+ kernel_initializer=tf.contrib.layers.xavier_initializer(),
+ reuse=None,
+ name='fc1')
+ fc1 = tf.nn.relu(fc1)
+ ## FC2
+ fc2 = tf.layers.dense(inputs=fc1,
+ units=self.hidden,
+ kernel_initializer=tf.contrib.layers.xavier_initializer(),
+ reuse=None,
+ name='fc2')
+ fc2 = tf.nn.relu(fc2)
+ ## FC3
+ fc3 = tf.layers.dense(inputs=fc2,
+ units=self.dimu,
+ kernel_initializer=tf.contrib.layers.xavier_initializer(),
+ reuse=None,
+ name='fc3')
+ self.pi_tf_fc2 = fc2
+ self.pi_tf = fc3
+
with tf.variable_scope('Q'):
# for policy training
input_Q = tf.concat(axis=1, values=[o, g, self.pi_tf / self.max_u])
diff --git a/baselines/her/ddpg.py b/baselines/her/ddpg.py
old mode 100644
new mode 100755
index 96384da4c4..6a06d96f6d
--- a/baselines/her/ddpg.py
+++ b/baselines/her/ddpg.py
@@ -120,13 +120,13 @@ def _preprocess_og(self, o, ag, g):
return o, g
def get_actions(self, o, ag, g, noise_eps=0., random_eps=0., use_target_net=False,
- compute_Q=False):
+ compute_Q=False,):
o, g = self._preprocess_og(o, ag, g)
policy = self.target if use_target_net else self.main
# values to compute
vals = [policy.pi_tf]
if compute_Q:
- vals += [policy.Q_pi_tf]
+ vals += [policy.Q_pi_tf, policy.pi_tf_fc2]
# feed
feed = {
policy.o_tf: o.reshape(-1, self.dimo),
@@ -150,6 +150,7 @@ def get_actions(self, o, ag, g, noise_eps=0., random_eps=0., use_target_net=Fals
return ret[0]
else:
return ret
+
def initDemoBuffer(self, demoDataFile, update_stats=True): #function that initializes the demo buffer
diff --git a/baselines/her/experiment/__init__.py b/baselines/her/experiment/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/her/experiment/config.py b/baselines/her/experiment/config.py
old mode 100644
new mode 100755
index 8cc36e6ee1..5ac6dadb02
--- a/baselines/her/experiment/config.py
+++ b/baselines/her/experiment/config.py
@@ -1,10 +1,18 @@
import numpy as np
import gym
+import gym_grasp
from baselines import logger
from baselines.her.ddpg import DDPG
from baselines.her.her import make_sample_her_transitions
+# --------------------------------------------------------------------------------------
+from baselines.custom_logger import CustomLoggerObject
+clogger = CustomLoggerObject()
+clogger.info("MyLogger is working!!")
+# --------------------------------------------------------------------------------------
+
+
DEFAULT_ENV_PARAMS = {
'FetchReach-v1': {
@@ -170,12 +178,16 @@ def configure_dims(params):
env = cached_make_env(params['make_env'])
env.reset()
obs, _, _, info = env.step(env.action_space.sample())
+
dims = {
'o': obs['observation'].shape[0],
'u': env.action_space.shape[0],
'g': obs['desired_goal'].shape[0],
}
+ clogger.info("input_dims = {}".format(dims))
+ clogger.info("env.action_apace={}".format(env.action_space))
+ clogger.info("env.observation_space={}".format(env.observation_space))
for key, value in info.items():
value = np.array(value)
if value.ndim == 0:
diff --git a/baselines/her/experiment/data_generation/fetch_data_generation.py b/baselines/her/experiment/data_generation/fetch_data_generation.py
old mode 100644
new mode 100755
diff --git a/baselines/her/experiment/play.py b/baselines/her/experiment/play.py
old mode 100644
new mode 100755
diff --git a/baselines/her/experiment/plot.py b/baselines/her/experiment/plot.py
old mode 100644
new mode 100755
diff --git a/baselines/her/experiment/test.py b/baselines/her/experiment/test.py
new file mode 100755
index 0000000000..6095643c09
--- /dev/null
+++ b/baselines/her/experiment/test.py
@@ -0,0 +1,241 @@
+import os
+import sys
+
+import click
+import numpy as np
+import json
+from mpi4py import MPI
+
+from baselines import logger
+from baselines.common import set_global_seeds
+from baselines.common.mpi_moments import mpi_moments
+import baselines.her.experiment.config as config
+from baselines.her.rollout import RolloutWorker
+from baselines.her.util import mpi_fork
+
+from subprocess import CalledProcessError
+import h5py
+
+
+# --------------------------------------------------------------------------------------
+from baselines.custom_logger import CustomLoggerObject
+clogger = CustomLoggerObject()
+clogger.info("MyLogger is working!!")
+# --------------------------------------------------------------------------------------
+
+
+def mpi_average(value):
+ if value == []:
+ value = [0.]
+ if not isinstance(value, list):
+ value = [value]
+ return mpi_moments(np.array(value))[0]
+
+
+def test(policy, rollout_worker, evaluator,
+ n_epochs, n_test_rollouts, n_cycles, n_batches, policy_save_interval,
+ save_policies, demo_file, logdir_aq, **kwargs):
+ clogger.info("Logdir for actions & Q-values: {}".format(logdir_aq))
+ rank = MPI.COMM_WORLD.Get_rank()
+
+ latest_policy_path = os.path.join(logger.get_dir(), 'policy_latest.pkl')
+ best_policy_path = os.path.join(logger.get_dir(), 'policy_best.pkl')
+ periodic_policy_path = os.path.join(logger.get_dir(), 'policy_{}.pkl')
+
+ logger.info("Training...")
+ best_success_rate = -1
+
+ if policy.bc_loss == 1: policy.initDemoBuffer(demo_file) #initialize demo buffer if training with demonstrations
+ for epoch in range(n_epochs):
+ clogger.info("Start: Epoch {}/{}".format(epoch, n_epochs))
+
+ # test
+ evaluator.clear_history()
+ episode_box = {"g":[],"ag":[],"o":[],"u":[],"q":[], "fc":[]}
+ for _ in range(n_test_rollouts):
+ episode = evaluator.generate_rollouts(is_train=False)
+ clogger.info("Episode = {}".format(episode.keys()))
+ for key in episode.keys():
+ # clogger.info(" - {}: {}".format(key, episode[key].shape))
+ if key in episode_box.keys():
+ episode_box[key].append(episode[key][np.newaxis, :])
+
+
+ # Dump episode info
+ for key in episode_box.keys():
+ # episode_box[key].append(episode[key])
+ l = len(episode[key])
+ episode_box[key] = np.concatenate(episode_box[key], axis=0)
+ clogger.info(" - {:<4}: {:>4} => {}".format(key, l, episode_box[key].shape))
+
+ filename = os.path.join(logdir_aq, 'epoch{}.h5'.format(epoch))
+ with h5py.File(filename, 'w') as f:
+ f.create_group('goal')
+ f['goal'].create_dataset('desired', data=episode_box["g"])
+ f['goal'].create_dataset('achieved', data=episode_box["ag"])
+ f.create_dataset('obeservation', data=episode_box["o"])
+ f.create_dataset('action', data=episode_box["u"])
+ f.create_dataset('Qvalue', data=episode_box["q"])
+ f.create_dataset('fc', data=episode_box["fc"])
+
+
+
+ # record logs
+ logger.record_tabular('epoch', epoch)
+ for key, val in evaluator.logs('test'):
+ logger.record_tabular(key, mpi_average(val))
+ # for key, val in rollout_worker.logs('train'):
+ # logger.record_tabular(key, mpi_average(val))
+ for key, val in policy.logs():
+ logger.record_tabular(key, mpi_average(val))
+
+ if rank == 0:
+ clogger.info("Show table")
+ logger.dump_tabular()
+
+ # save the policy if it's better than the previous ones
+ success_rate = mpi_average(evaluator.current_success_rate())
+ if rank == 0 and success_rate >= best_success_rate and save_policies:
+ best_success_rate = success_rate
+ logger.info('New best success rate: {}. Saving policy to {} ...'.format(best_success_rate, best_policy_path))
+ evaluator.save_policy(best_policy_path)
+ evaluator.save_policy(latest_policy_path)
+ if rank == 0 and policy_save_interval > 0 and epoch % policy_save_interval == 0 and save_policies:
+ policy_path = periodic_policy_path.format(epoch)
+ logger.info('Saving periodic policy to {} ...'.format(policy_path))
+ evaluator.save_policy(policy_path)
+
+ # make sure that different threads have different seeds
+ local_uniform = np.random.uniform(size=(1,))
+ root_uniform = local_uniform.copy()
+ MPI.COMM_WORLD.Bcast(root_uniform, root=0)
+ if rank != 0:
+ assert local_uniform[0] != root_uniform[0]
+
+
+def launch(
+ env, logdir, n_epochs, num_cpu, seed, replay_strategy, policy_save_interval, clip_return,
+ demo_file, logdir_tf=None, logdir_aq=None, override_params={}, save_policies=True
+):
+ assert logdir_tf, "Test mode need `logdir_tf`"
+ # Fork for multi-CPU MPI implementation.
+ if num_cpu > 1:
+ try:
+ whoami = mpi_fork(num_cpu, ['--bind-to', 'core'])
+ except CalledProcessError:
+ # fancy version of mpi call failed, try simple version
+ whoami = mpi_fork(num_cpu)
+
+ if whoami == 'parent':
+ sys.exit(0)
+ import baselines.common.tf_util as U
+ U.single_threaded_session().__enter__()
+ rank = MPI.COMM_WORLD.Get_rank()
+
+ # Configure logging
+ if rank == 0:
+ if logdir or logger.get_dir() is None:
+ logger.configure(dir=logdir)
+ else:
+ logger.configure()
+ logdir = logger.get_dir()
+ assert logdir is not None
+ os.makedirs(logdir, exist_ok=True)
+
+ # Seed everything.
+ rank_seed = seed + 1000000 * rank
+ set_global_seeds(rank_seed)
+
+ # Prepare params.
+ params = config.DEFAULT_PARAMS
+ params['env_name'] = env
+ params['replay_strategy'] = replay_strategy
+ if env in config.DEFAULT_ENV_PARAMS:
+ params.update(config.DEFAULT_ENV_PARAMS[env]) # merge env-specific parameters in
+ params.update(**override_params) # makes it possible to override any parameter
+ with open(os.path.join(logger.get_dir(), 'params.json'), 'w') as f:
+ json.dump(params, f)
+ params = config.prepare_params(params)
+ config.log_params(params, logger=logger)
+
+ if num_cpu == 1:
+ logger.warn()
+ logger.warn('*** Warning ***')
+ logger.warn(
+ 'You are running HER with just a single MPI worker. This will work, but the ' +
+ 'experiments that we report in Plappert et al. (2018, https://arxiv.org/abs/1802.09464) ' +
+ 'were obtained with --num_cpu 19. This makes a significant difference and if you ' +
+ 'are looking to reproduce those results, be aware of this. Please also refer to ' +
+ 'https://github.com/openai/baselines/issues/314 for further details.')
+ logger.warn('****************')
+ logger.warn()
+
+ dims = config.configure_dims(params)
+ policy = config.configure_ddpg(dims=dims, params=params, clip_return=clip_return)
+ # Load Learned Parameters
+ if logdir_tf:
+ import tensorflow as tf
+ saver = tf.train.Saver()
+ saver.restore(policy.sess, logdir_tf)
+
+ rollout_params = {
+ 'exploit': False,
+ 'use_target_net': False,
+ 'use_demo_states': True,
+ 'compute_Q': False,
+ 'T': params['T'],
+ }
+
+ eval_params = {
+ 'exploit': True,
+ 'use_target_net': params['test_with_polyak'],
+ 'use_demo_states': False,
+ 'compute_Q': True,
+ 'T': params['T'],
+ }
+
+ for name in ['T', 'rollout_batch_size', 'gamma', 'noise_eps', 'random_eps']:
+ rollout_params[name] = params[name]
+ eval_params[name] = params[name]
+
+ rollout_worker = RolloutWorker(params['make_env'], policy, dims, logger, **rollout_params)
+ rollout_worker.seed(rank_seed)
+
+ evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params)
+ evaluator.seed(rank_seed)
+
+ # Log Directory for actions and qvalues
+ if not logdir_aq:
+ logdir_aq = os.path.join(logdir_tf, "ActionQvals")
+ if not os.path.exists(logdir_aq):
+ os.makedirs(logdir_aq)
+ clogger.info("Create Logdir to {}".format(logdir_aq))
+
+ test(
+ logdir=logdir, policy=policy, rollout_worker=rollout_worker,
+ evaluator=evaluator, n_epochs=n_epochs, n_test_rollouts=params['n_test_rollouts'],
+ n_cycles=params['n_cycles'], n_batches=params['n_batches'],
+ policy_save_interval=policy_save_interval, save_policies=save_policies, demo_file=demo_file,
+ logdir_aq=logdir_aq,
+ )
+
+
+@click.command()
+@click.option('--env', type=str, default='FetchReach-v1', help='the name of the OpenAI Gym environment that you want to train on')
+@click.option('--logdir', type=str, default=None, help='the path to where logs and policy pickles should go. If not specified, creates a folder in /tmp/')
+@click.option('--n_epochs', type=int, default=50, help='the number of training epochs to run')
+@click.option('--num_cpu', type=int, default=1, help='the number of CPU cores to use (using MPI)')
+@click.option('--seed', type=int, default=0, help='the random seed used to seed both the environment and the training code')
+@click.option('--policy_save_interval', type=int, default=5, help='the interval with which policy pickles are saved. If set to 0, only the best and latest policy will be pickled.')
+@click.option('--replay_strategy', type=click.Choice(['future', 'none']), default='future', help='the HER replay strategy to be used. "future" uses HER, "none" disables HER.')
+@click.option('--clip_return', type=int, default=1, help='whether or not returns should be clipped')
+@click.option('--demo_file', type=str, default = 'PATH/TO/DEMO/DATA/FILE.npz', help='demo data file path')
+@click.option('--logdir_tf', type=str, default=None, help='the path to save tf.variables.')
+@click.option('--logdir_aq', type=str, default=None, help='the path to save tf.variables.')
+def main(**kwargs):
+ clogger.info("Main Func @her.experiment.train")
+ launch(**kwargs)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/baselines/her/experiment/train.py b/baselines/her/experiment/train.py
old mode 100644
new mode 100755
index 82a11f0ad6..ea37343b4d
--- a/baselines/her/experiment/train.py
+++ b/baselines/her/experiment/train.py
@@ -15,6 +15,12 @@
from subprocess import CalledProcessError
+# --------------------------------------------------------------------------------------
+from baselines.custom_logger import CustomLoggerObject
+clogger = CustomLoggerObject()
+clogger.info("MyLogger is working!!")
+# --------------------------------------------------------------------------------------
+
def mpi_average(value):
if value == []:
@@ -24,33 +30,62 @@ def mpi_average(value):
return mpi_moments(np.array(value))[0]
-def train(policy, rollout_worker, evaluator,
+def train(min_num, max_num, num_axis, reward_lambda, # nishimura
+ policy, rollout_worker, evaluator,
n_epochs, n_test_rollouts, n_cycles, n_batches, policy_save_interval,
- save_policies, demo_file, **kwargs):
+ save_policies, demo_file, logdir_init, **kwargs):
rank = MPI.COMM_WORLD.Get_rank()
latest_policy_path = os.path.join(logger.get_dir(), 'policy_latest.pkl')
best_policy_path = os.path.join(logger.get_dir(), 'policy_best.pkl')
periodic_policy_path = os.path.join(logger.get_dir(), 'policy_{}.pkl')
+ best_policy_grasp_path = os.path.join(logger.get_dir(), "grasp_dataset_on_best_policy.npy") # motoda
+ path_to_grasp_dataset = os.path.join(logger.get_dir(), "grasp_dataset_{}.npy") # motoda
+
+ all_success_grasp_path = os.path.join(logger.get_dir(), "total_grasp_dataset.npy") # motoda
+
+ # motoda --
+ success_u = []
+ init_success_u = []
+ path_to_default_grasp_dataset = "model/initial_grasp_pose.npy"
+ if os.path.exists(path_to_default_grasp_dataset):
+ init_success_u = np.load(path_to_default_grasp_dataset) # Load Initial Grasp Pose set
+ init_success_u = (init_success_u.tolist())
+ for tmp_suc in init_success_u:
+ success_u.append(tmp_suc[0:20])
+ print ("Num of grasp : {} ".format(len (success_u)))
+ else:
+ print ("No initial grasp pose")
+ # ---
+
+ # motoda --
+ all_success_u = [] # Dumping grasp_pose
+ # --
logger.info("Training...")
best_success_rate = -1
if policy.bc_loss == 1: policy.initDemoBuffer(demo_file) #initialize demo buffer if training with demonstrations
for epoch in range(n_epochs):
+ clogger.info("Start: Epoch {}/{}".format(epoch, n_epochs))
# train
rollout_worker.clear_history()
+ saved_success_u = []
for _ in range(n_cycles):
- episode = rollout_worker.generate_rollouts()
+ episode, success_tmp = rollout_worker.generate_rollouts(min_num=min_num,num_axis=num_axis,reward_lambda=reward_lambda,success_u=success_u) # nishimura, 雑実装
+ # clogger.info("Episode = {}".format(episode.keys()))
+ # for key in episode.keys():
+ # clogger.info(" - {}: {}".format(key, episode[key].shape))
policy.store_episode(episode)
for _ in range(n_batches):
policy.train()
policy.update_target_net()
+ saved_success_u += success_tmp # motoda
# test
evaluator.clear_history()
for _ in range(n_test_rollouts):
- evaluator.generate_rollouts()
+ evaluator.generate_rollouts(min_num=min_num,num_axis=num_axis,reward_lambda=reward_lambda) # nishimura, 雑実装
# record logs
logger.record_tabular('epoch', epoch)
@@ -71,10 +106,22 @@ def train(policy, rollout_worker, evaluator,
logger.info('New best success rate: {}. Saving policy to {} ...'.format(best_success_rate, best_policy_path))
evaluator.save_policy(best_policy_path)
evaluator.save_policy(latest_policy_path)
+ np.save(best_policy_grasp_path, success_u)
if rank == 0 and policy_save_interval > 0 and epoch % policy_save_interval == 0 and save_policies:
policy_path = periodic_policy_path.format(epoch)
logger.info('Saving periodic policy to {} ...'.format(policy_path))
evaluator.save_policy(policy_path)
+ # -- motoda added
+ grasp_path = path_to_grasp_dataset.format(epoch)
+ logger.info('Saving grasp pose: {} grasps. Saving policy to {} ...'.format(len(saved_success_u), grasp_path))
+ np.save(grasp_path, saved_success_u)
+ # --
+
+ # -- reset : grasp Pose -------
+ # success_u = [] # reset (motoda)
+ # -----------------------------
+
+ success_u = success_u[-max_num:] # nishimura
# make sure that different threads have different seeds
local_uniform = np.random.uniform(size=(1,))
@@ -83,10 +130,17 @@ def train(policy, rollout_worker, evaluator,
if rank != 0:
assert local_uniform[0] != root_uniform[0]
+ all_success_u += saved_success_u # motoda
+
+ # motoda --
+ # Dumping the total success_pose
+ logger.info('Saving grasp pose: {} grasps. Saving policy to {} ...'.format(len(all_success_u), all_success_grasp_path))
+ np.save(all_success_grasp_path, saved_success_u)
+ # --
def launch(
- env, logdir, n_epochs, num_cpu, seed, replay_strategy, policy_save_interval, clip_return,
- demo_file, override_params={}, save_policies=True
+ env, logdir, n_epochs, min_num, max_num, num_axis, reward_lambda, num_cpu, seed, replay_strategy, policy_save_interval, clip_return,
+ demo_file, logdir_tf=None, override_params={}, save_policies=True, logdir_init=None
):
# Fork for multi-CPU MPI implementation.
if num_cpu > 1:
@@ -140,8 +194,26 @@ def launch(
logger.warn('****************')
logger.warn()
+
dims = config.configure_dims(params)
policy = config.configure_ddpg(dims=dims, params=params, clip_return=clip_return)
+ clogger.info(policy.sess)
+ # Prepare for Saving Network
+ clogger.info("logdir_tf: {}".format(logdir_tf))
+ if not logdir_tf == None:
+ clogger.info("Create tc.Saver()")
+ import tensorflow as tf
+ saver = tf.train.Saver()
+
+ # motoda added --
+ # Load Learned Parameters
+ if not logdir_init == None:
+ if logdir_tf == None:
+ import tensorflow as tf
+ saver = tf.train.Saver()
+ saver.restore(policy.sess, logdir_init)
+ clogger.info("Model was successflly loaded [logidr_tf={}]".format(logdir_init))
+ # ---------------
rollout_params = {
'exploit': False,
@@ -170,23 +242,40 @@ def launch(
evaluator.seed(rank_seed)
train(
+ min_num=min_num, max_num=max_num, num_axis=num_axis, reward_lambda=reward_lambda, # nishimura
logdir=logdir, policy=policy, rollout_worker=rollout_worker,
evaluator=evaluator, n_epochs=n_epochs, n_test_rollouts=params['n_test_rollouts'],
n_cycles=params['n_cycles'], n_batches=params['n_batches'],
- policy_save_interval=policy_save_interval, save_policies=save_policies, demo_file=demo_file)
+ policy_save_interval=policy_save_interval, save_policies=save_policies, demo_file=demo_file, logdir_init=logdir_init)
+
+
+ # Save Trained Network
+ if logdir_tf:
+ clogger.info("Save tf.variables to {}".format(logdir_tf))
+ clogger.info(policy.sess)
+ saver.save(policy.sess, logdir_tf)
+ clogger.info("Model was successflly saved [logidr_tf={}]".format(logdir_tf))
@click.command()
@click.option('--env', type=str, default='FetchReach-v1', help='the name of the OpenAI Gym environment that you want to train on')
@click.option('--logdir', type=str, default=None, help='the path to where logs and policy pickles should go. If not specified, creates a folder in /tmp/')
@click.option('--n_epochs', type=int, default=50, help='the number of training epochs to run')
+@click.option('--min_num', type=int, default=100,help='minimum number of success_u whether to run PCA')
+@click.option('--max_num', type=int, default=10000,help='limit of success_u for PCA')
+@click.option('--num_axis', type=int, default=5,help='number of principal components to calculate the reward function')
+@click.option('--reward_lambda', type=float, default=1.,help='a weight for the second term of the reward function')
@click.option('--num_cpu', type=int, default=1, help='the number of CPU cores to use (using MPI)')
@click.option('--seed', type=int, default=0, help='the random seed used to seed both the environment and the training code')
@click.option('--policy_save_interval', type=int, default=5, help='the interval with which policy pickles are saved. If set to 0, only the best and latest policy will be pickled.')
@click.option('--replay_strategy', type=click.Choice(['future', 'none']), default='future', help='the HER replay strategy to be used. "future" uses HER, "none" disables HER.')
@click.option('--clip_return', type=int, default=1, help='whether or not returns should be clipped')
@click.option('--demo_file', type=str, default = 'PATH/TO/DEMO/DATA/FILE.npz', help='demo data file path')
+@click.option('--logdir_tf', type=str, default=None, help='the path to save tf.variables.')
+@click.option('--logdir_init', type=str, default='model/init', help='the path to load default paramater.') # There are meta data at model/init
+
def main(**kwargs):
+ clogger.info("Main Func @her.experiment.train")
launch(**kwargs)
diff --git a/baselines/her/her.py b/baselines/her/her.py
old mode 100644
new mode 100755
diff --git a/baselines/her/normalizer.py b/baselines/her/normalizer.py
old mode 100644
new mode 100755
diff --git a/baselines/her/replay_buffer.py b/baselines/her/replay_buffer.py
old mode 100644
new mode 100755
diff --git a/baselines/her/rollout.py b/baselines/her/rollout.py
old mode 100644
new mode 100755
index e33b92add1..dd5780519a
--- a/baselines/her/rollout.py
+++ b/baselines/her/rollout.py
@@ -7,6 +7,13 @@
from baselines.her.util import convert_episode_to_batch_major, store_args
+# --------------------------------------------------------------------------------------
+from baselines.custom_logger import CustomLoggerObject
+clogger = CustomLoggerObject()
+clogger.info("MyLogger is working!!")
+# --------------------------------------------------------------------------------------
+
+
class RolloutWorker:
@store_args
@@ -61,10 +68,14 @@ def reset_all_rollouts(self):
for i in range(self.rollout_batch_size):
self.reset_rollout(i)
- def generate_rollouts(self):
+ def generate_rollouts(self, min_num, num_axis, reward_lambda, success_u=[], is_train=True): # nishimura
"""Performs `rollout_batch_size` rollouts in parallel for time horizon `T` with the current
policy acting on it accordingly.
"""
+
+ import sklearn
+ from sklearn.decomposition import PCA
+
self.reset_all_rollouts()
# compute observations
@@ -75,6 +86,8 @@ def generate_rollouts(self):
# generate episodes
obs, achieved_goals, acts, goals, successes = [], [], [], [], []
+ q_vals = []
+ fcs = []
info_values = [np.empty((self.T, self.rollout_batch_size, self.dims['info_' + key]), np.float32) for key in self.info_keys]
Qs = []
for t in range(self.T):
@@ -83,11 +96,15 @@ def generate_rollouts(self):
compute_Q=self.compute_Q,
noise_eps=self.noise_eps if not self.exploit else 0.,
random_eps=self.random_eps if not self.exploit else 0.,
- use_target_net=self.use_target_net)
-
+ use_target_net=self.use_target_net,)
+ # clogger.info("compute_Q[{}, {}]: policy_output: {}".format(self.compute_Q, t, policy_output))
+
if self.compute_Q:
- u, Q = policy_output
+ u, Q, fc = policy_output
Qs.append(Q)
+ q_vals.append(Q.copy())
+ if fc.ndim == 1:
+ fc = fc.reshape(1,-1)
else:
u = policy_output
@@ -95,18 +112,32 @@ def generate_rollouts(self):
# The non-batched case should still have a reasonable shape.
u = u.reshape(1, -1)
+
o_new = np.empty((self.rollout_batch_size, self.dims['o']))
ag_new = np.empty((self.rollout_batch_size, self.dims['g']))
success = np.zeros(self.rollout_batch_size)
+
# compute new states and observations
for i in range(self.rollout_batch_size):
+ # -- nishimura 雑実装
+ self.envs[i].num_axis = num_axis
+ self.envs[i].reward_lambda = reward_lambda
+ # --
try:
# We fully ignore the reward here because it will have to be re-computed
# for HER.
curr_o_new, _, _, info = self.envs[i].step(u[i])
if 'is_success' in info:
success[i] = info['is_success']
- o_new[i] = curr_o_new['observation']
+
+ if success[i] > 0:
+ success_u.append(u[i][0:20])
+ if len(success_u)>=min_num: # nishimura
+ pca = PCA()
+ pca.fit(success_u)
+ self.envs[i].variance_ratio.append(pca.explained_variance_ratio_)
+
+ o_new[i] = curr_o_new['observation']
ag_new[i] = curr_o_new['achieved_goal']
for idx, key in enumerate(self.info_keys):
info_values[idx][t, i] = info[key]
@@ -124,6 +155,8 @@ def generate_rollouts(self):
achieved_goals.append(ag.copy())
successes.append(success.copy())
acts.append(u.copy())
+ if self.compute_Q:
+ fcs.append(fc.copy())
goals.append(self.g.copy())
o[...] = o_new
ag[...] = ag_new
@@ -131,10 +164,21 @@ def generate_rollouts(self):
achieved_goals.append(ag.copy())
self.initial_o[:] = o
- episode = dict(o=obs,
- u=acts,
- g=goals,
- ag=achieved_goals)
+ if is_train:
+ episode = dict(o=obs,
+ u=acts,
+ g=goals,
+ ag=achieved_goals
+ )
+ else:
+ episode = dict(o=obs,
+ u=acts,
+ fc=fcs,
+ g=goals,
+ ag=achieved_goals,
+ q=q_vals,
+ )
+
for key, value in zip(self.info_keys, info_values):
episode['info_{}'.format(key)] = value
@@ -147,7 +191,7 @@ def generate_rollouts(self):
self.Q_history.append(np.mean(Qs))
self.n_episodes += self.rollout_batch_size
- return convert_episode_to_batch_major(episode)
+ return convert_episode_to_batch_major(episode), success_u # motoda
def clear_history(self):
"""Clears all histories that are used for statistics
diff --git a/baselines/her/util.py b/baselines/her/util.py
old mode 100644
new mode 100755
diff --git a/baselines/logger.py b/baselines/logger.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo1/README.md b/baselines/ppo1/README.md
old mode 100644
new mode 100755
diff --git a/baselines/ppo1/__init__.py b/baselines/ppo1/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo1/cnn_policy.py b/baselines/ppo1/cnn_policy.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo1/mlp_policy.py b/baselines/ppo1/mlp_policy.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo1/pposgd_simple.py b/baselines/ppo1/pposgd_simple.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo1/run_atari.py b/baselines/ppo1/run_atari.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo1/run_humanoid.py b/baselines/ppo1/run_humanoid.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo1/run_mujoco.py b/baselines/ppo1/run_mujoco.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo1/run_robotics.py b/baselines/ppo1/run_robotics.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo2/README.md b/baselines/ppo2/README.md
old mode 100644
new mode 100755
diff --git a/baselines/ppo2/__init__.py b/baselines/ppo2/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo2/defaults.py b/baselines/ppo2/defaults.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo2/microbatched_model.py b/baselines/ppo2/microbatched_model.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo2/model.py b/baselines/ppo2/model.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo2/ppo2.py b/baselines/ppo2/ppo2.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo2/runner.py b/baselines/ppo2/runner.py
old mode 100644
new mode 100755
diff --git a/baselines/ppo2/test_microbatches.py b/baselines/ppo2/test_microbatches.py
old mode 100644
new mode 100755
diff --git a/baselines/results_plotter.py b/baselines/results_plotter.py
old mode 100644
new mode 100755
diff --git a/baselines/run.py b/baselines/run.py
old mode 100644
new mode 100755
index c0298f3a43..8ef9cc5b18
--- a/baselines/run.py
+++ b/baselines/run.py
@@ -15,6 +15,14 @@
from baselines.common.vec_env.vec_normalize import VecNormalize
+
+# --------------------------------------------------------------------------------------
+from baselines.custom_logger import CustomLoggerObject
+clogger = CustomLoggerObject()
+clogger.info("MyLogger is working!!")
+# --------------------------------------------------------------------------------------
+
+
try:
from mpi4py import MPI
except ImportError:
diff --git a/baselines/trpo_mpi/README.md b/baselines/trpo_mpi/README.md
old mode 100644
new mode 100755
diff --git a/baselines/trpo_mpi/__init__.py b/baselines/trpo_mpi/__init__.py
old mode 100644
new mode 100755
diff --git a/baselines/trpo_mpi/defaults.py b/baselines/trpo_mpi/defaults.py
old mode 100644
new mode 100755
diff --git a/baselines/trpo_mpi/trpo_mpi.py b/baselines/trpo_mpi/trpo_mpi.py
old mode 100644
new mode 100755
diff --git a/docs/README.md b/docs/README.md
new file mode 100755
index 0000000000..de5957c176
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,147 @@
+ [![Build status](https://travis-ci.org/openai/baselines.svg?branch=master)](https://travis-ci.org/openai/baselines)
+
+# Baselines
+
+OpenAI Baselines is a set of high-quality implementations of reinforcement learning algorithms.
+
+These algorithms will make it easier for the research community to replicate, refine, and identify new ideas, and will create good baselines to build research on top of. Our DQN implementation and its variants are roughly on par with the scores in published papers. We expect they will be used as a base around which new ideas can be added, and as a tool for comparing a new approach against existing ones.
+
+## Prerequisites
+Baselines requires python3 (>=3.5) with the development headers. You'll also need system packages CMake, OpenMPI and zlib. Those can be installed as follows
+### Ubuntu
+
+```bash
+sudo apt-get update && sudo apt-get install cmake libopenmpi-dev python3-dev zlib1g-dev
+```
+
+### Mac OS X
+Installation of system packages on Mac requires [Homebrew](https://brew.sh). With Homebrew installed, run the following:
+```bash
+brew install cmake openmpi
+```
+
+## Virtual environment
+From the general python package sanity perspective, it is a good idea to use virtual environments (virtualenvs) to make sure packages from different projects do not interfere with each other. You can install virtualenv (which is itself a pip package) via
+```bash
+pip install virtualenv
+```
+Virtualenvs are essentially folders that have copies of python executable and all python packages.
+To create a virtualenv called venv with python3, one runs
+```bash
+virtualenv /path/to/venv --python=python3
+```
+To activate a virtualenv:
+```
+. /path/to/venv/bin/activate
+```
+More thorough tutorial on virtualenvs and options can be found [here](https://virtualenv.pypa.io/en/stable/)
+
+
+## Installation
+- Clone the repo and cd into it:
+ ```bash
+ git clone https://github.com/openai/baselines.git
+ cd baselines
+ ```
+- If you don't have TensorFlow installed already, install your favourite flavor of TensorFlow. In most cases,
+ ```bash
+ pip install tensorflow-gpu # if you have a CUDA-compatible gpu and proper drivers
+ ```
+ or
+ ```bash
+ pip install tensorflow
+ ```
+ should be sufficient. Refer to [TensorFlow installation guide](https://www.tensorflow.org/install/)
+ for more details.
+
+- Install baselines package
+ ```bash
+ pip install -e .
+ ```
+
+### MuJoCo
+Some of the baselines examples use [MuJoCo](http://www.mujoco.org) (multi-joint dynamics in contact) physics simulator, which is proprietary and requires binaries and a license (temporary 30-day license can be obtained from [www.mujoco.org](http://www.mujoco.org)). Instructions on setting up MuJoCo can be found [here](https://github.com/openai/mujoco-py)
+
+## Testing the installation
+All unit tests in baselines can be run using pytest runner:
+```
+pip install pytest
+pytest
+```
+
+## Training models
+Most of the algorithms in baselines repo are used as follows:
+```bash
+python -m baselines.run --alg= --env= [additional arguments]
+```
+### Example 1. PPO with MuJoCo Humanoid
+For instance, to train a fully-connected network controlling MuJoCo humanoid using PPO2 for 20M timesteps
+```bash
+python -m baselines.run --alg=ppo2 --env=Humanoid-v2 --network=mlp --num_timesteps=2e7
+```
+Note that for mujoco environments fully-connected network is default, so we can omit `--network=mlp`
+The hyperparameters for both network and the learning algorithm can be controlled via the command line, for instance:
+```bash
+python -m baselines.run --alg=ppo2 --env=Humanoid-v2 --network=mlp --num_timesteps=2e7 --ent_coef=0.1 --num_hidden=32 --num_layers=3 --value_network=copy
+```
+will set entropy coefficient to 0.1, and construct fully connected network with 3 layers with 32 hidden units in each, and create a separate network for value function estimation (so that its parameters are not shared with the policy network, but the structure is the same)
+
+See docstrings in [common/models.py](../baselines/common/models.py) for description of network parameters for each type of model, and
+docstring for [baselines/ppo2/ppo2.py/learn()](../baselines/ppo2/ppo2.py#L152) for the description of the ppo2 hyperparamters.
+
+### Example 2. DQN on Atari
+DQN with Atari is at this point a classics of benchmarks. To run the baselines implementation of DQN on Atari Pong:
+```
+python -m baselines.run --alg=deepq --env=PongNoFrameskip-v4 --num_timesteps=1e6
+```
+
+## Saving, loading and visualizing models
+The algorithms serialization API is not properly unified yet; however, there is a simple method to save / restore trained models.
+`--save_path` and `--load_path` command-line option loads the tensorflow state from a given path before training, and saves it after the training, respectively.
+Let's imagine you'd like to train ppo2 on Atari Pong, save the model and then later visualize what has it learnt.
+```bash
+python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=2e7 --save_path=~/models/pong_20M_ppo2
+```
+This should get to the mean reward per episode about 20. To load and visualize the model, we'll do the following - load the model, train it for 0 steps, and then visualize:
+```bash
+python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=0 --load_path=~/models/pong_20M_ppo2 --play
+```
+
+*NOTE:* At the moment Mujoco training uses VecNormalize wrapper for the environment which is not being saved correctly; so loading the models trained on Mujoco will not work well if the environment is recreated. If necessary, you can work around that by replacing RunningMeanStd by TfRunningMeanStd in [baselines/common/vec_env/vec_normalize.py](../baselines/common/vec_env/vec_normalize.py#L12). This way, mean and std of environment normalizing wrapper will be saved in tensorflow variables and included in the model file; however, training is slower that way - hence not including it by default
+
+## Loading and vizualizing learning curves and other training metrics
+See [here](docs/viz/viz.ipynb) for instructions on how to load and display the training data.
+
+## Subpackages
+
+- [A2C](../baselines/a2c)
+- [ACER](../baselines/acer)
+- [ACKTR](../baselines/acktr)
+- [DDPG](../baselines/ddpg)
+- [DQN](../baselines/deepq)
+- [GAIL](../baselines/gail)
+- [HER](../baselines/her)
+- [PPO1](../baselines/ppo1) (obsolete version, left here temporarily)
+- [PPO2](../baselines/ppo2)
+- [TRPO](../baselines/trpo_mpi)
+
+
+
+## Benchmarks
+Results of benchmarks on Mujoco (1M timesteps) and Atari (10M timesteps) are available
+[here for Mujoco](https://htmlpreview.github.com/?https://github.com/openai/baselines/blob/master/benchmarks_mujoco1M.htm)
+and
+[here for Atari](https://htmlpreview.github.com/?https://github.com/openai/baselines/blob/master/benchmarks_atari10M.htm)
+respectively. Note that these results may be not on the latest version of the code, particular commit hash with which results were obtained is specified on the benchmarks page.
+
+To cite this repository in publications:
+
+ @misc{baselines,
+ author = {Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai and Zhokhov, Peter},
+ title = {OpenAI Baselines},
+ year = {2017},
+ publisher = {GitHub},
+ journal = {GitHub repository},
+ howpublished = {\url{https://github.com/openai/baselines}},
+ }
+
diff --git a/benchmarks_atari10M.htm b/docs/benchmarks_atari10M.htm
old mode 100644
new mode 100755
similarity index 100%
rename from benchmarks_atari10M.htm
rename to docs/benchmarks_atari10M.htm
diff --git a/benchmarks_mujoco1M.htm b/docs/benchmarks_mujoco1M.htm
old mode 100644
new mode 100755
similarity index 100%
rename from benchmarks_mujoco1M.htm
rename to docs/benchmarks_mujoco1M.htm
diff --git a/data/cartpole.gif b/docs/data/cartpole.gif
old mode 100644
new mode 100755
similarity index 100%
rename from data/cartpole.gif
rename to docs/data/cartpole.gif
diff --git a/data/fetchPickAndPlaceContrast.png b/docs/data/fetchPickAndPlaceContrast.png
old mode 100644
new mode 100755
similarity index 100%
rename from data/fetchPickAndPlaceContrast.png
rename to docs/data/fetchPickAndPlaceContrast.png
diff --git a/data/logo.jpg b/docs/data/logo.jpg
old mode 100644
new mode 100755
similarity index 100%
rename from data/logo.jpg
rename to docs/data/logo.jpg
diff --git a/docs/viz/viz.ipynb b/docs/viz/viz.ipynb
old mode 100644
new mode 100755
diff --git a/gym-grasp/README.md b/gym-grasp/README.md
new file mode 100644
index 0000000000..cd957b4579
--- /dev/null
+++ b/gym-grasp/README.md
@@ -0,0 +1,20 @@
+# gym_grasp
+
+## GraspBlock
+
+
+# Installation
+
+```bash
+cd gym-grasp
+pip install -e .
+```
+
+# How To Use
+
+```python
+import gym
+import gym_grasp # This includes GraspBlock-v0
+
+env = gym.make('GraspBlock-v0')
+```
diff --git a/gym-grasp/__init__.py b/gym-grasp/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/gym-grasp/gym_grasp/__init__.py b/gym-grasp/gym_grasp/__init__.py
new file mode 100644
index 0000000000..5fa8fec811
--- /dev/null
+++ b/gym-grasp/gym_grasp/__init__.py
@@ -0,0 +1,12 @@
+from gym.envs.registration import register
+
+
+def _merge(a, b):
+ a.update(b)
+ return a
+
+register(
+ id='GraspBlock-v0',
+ entry_point='gym_grasp.envs:GraspBlockEnv',
+ max_episode_steps=100,
+)
\ No newline at end of file
diff --git a/gym-grasp/gym_grasp/envs/README.md b/gym-grasp/gym_grasp/envs/README.md
new file mode 100644
index 0000000000..5dbbfdab4a
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/README.md
@@ -0,0 +1,54 @@
+# Robotics environments
+
+Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics).
+
+If you use these environments, please cite the following paper:
+
+```
+@misc{1802.09464,
+ Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba},
+ Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research},
+ Year = {2018},
+ Eprint = {arXiv:1802.09464},
+}
+```
+
+## Fetch environments
+
+
+[FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position.
+
+
+
+
+[FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
+
+
+
+
+[FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position.
+
+
+
+
+[FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
+
+## Shadow Dexterous Hand environments
+
+
+[HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
+
+
+
+
+[HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
+
+
+
+
+[HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
+
+
+
+
+[HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.
diff --git a/gym-grasp/gym_grasp/envs/__init__.py b/gym-grasp/gym_grasp/envs/__init__.py
new file mode 100644
index 0000000000..a153f413f2
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/__init__.py
@@ -0,0 +1 @@
+from gym_grasp.envs.hand.grasp_block import GraspBlockEnv
diff --git a/gym-grasp/gym_grasp/envs/assets/LICENSE.md b/gym-grasp/gym_grasp/envs/assets/LICENSE.md
new file mode 100644
index 0000000000..22ce9010d0
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/LICENSE.md
@@ -0,0 +1,222 @@
+# Fetch Robotics
+The model of the [Fetch](http://fetchrobotics.com/platforms-research-development/) is based on [models provided by Fetch](https://github.com/fetchrobotics/fetch_ros/tree/indigo-devel/fetch_description). It was adapted and refined by OpenAI.
+
+# ShadowHand
+The model of the [ShadowHand](https://www.shadowrobot.com/products/dexterous-hand/) is based on [models provided by ShadowRobot](https://github.com/shadow-robot/sr_common/tree/kinetic-devel/sr_description/hand/model), and on code used under the following license:
+
+(C) Vikash Kumar, CSE, UW. Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+Additional license notices:
+
+ Sources : 1) Manipulator and Manipulation in High Dimensional Spaces. Vikash Kumar, Ph.D. Thesis, CSE, Univ. of Washington. 2016.
+
+ Mujoco :: Advanced physics simulation engine
+ Source : www.roboti.us
+ Version : 1.40
+ Released : 17Jan'17
+
+ Author :: Vikash Kumar
+ Contacts : vikash@openai.com
+ Last edits : 3Apr'17
diff --git a/gym-grasp/gym_grasp/envs/assets/fetch/pick_and_place.xml b/gym-grasp/gym_grasp/envs/assets/fetch/pick_and_place.xml
new file mode 100644
index 0000000000..337032a832
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/fetch/pick_and_place.xml
@@ -0,0 +1,35 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/fetch/push.xml b/gym-grasp/gym_grasp/envs/assets/fetch/push.xml
new file mode 100644
index 0000000000..8e12db248c
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/fetch/push.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/gym-grasp/gym_grasp/envs/assets/fetch/reach.xml b/gym-grasp/gym_grasp/envs/assets/fetch/reach.xml
new file mode 100644
index 0000000000..c73d6249f3
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/fetch/reach.xml
@@ -0,0 +1,26 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/fetch/robot.xml b/gym-grasp/gym_grasp/envs/assets/fetch/robot.xml
new file mode 100644
index 0000000000..9ee7723b5e
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/fetch/robot.xml
@@ -0,0 +1,123 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/fetch/shared.xml b/gym-grasp/gym_grasp/envs/assets/fetch/shared.xml
new file mode 100644
index 0000000000..5d61fef70d
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/fetch/shared.xml
@@ -0,0 +1,66 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/fetch/slide.xml b/gym-grasp/gym_grasp/envs/assets/fetch/slide.xml
new file mode 100644
index 0000000000..efbfb51bd0
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/fetch/slide.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/hand/grasp_block.xml b/gym-grasp/gym_grasp/envs/assets/hand/grasp_block.xml
new file mode 100644
index 0000000000..b271a2548d
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/hand/grasp_block.xml
@@ -0,0 +1,82 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/hand/manipulate_block.xml b/gym-grasp/gym_grasp/envs/assets/hand/manipulate_block.xml
new file mode 100644
index 0000000000..83a6517e6c
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/hand/manipulate_block.xml
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/hand/manipulate_egg.xml b/gym-grasp/gym_grasp/envs/assets/hand/manipulate_egg.xml
new file mode 100644
index 0000000000..46d1dbba84
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/hand/manipulate_egg.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/hand/manipulate_pen.xml b/gym-grasp/gym_grasp/envs/assets/hand/manipulate_pen.xml
new file mode 100644
index 0000000000..20a6fb5e06
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/hand/manipulate_pen.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/hand/reach.xml b/gym-grasp/gym_grasp/envs/assets/hand/reach.xml
new file mode 100644
index 0000000000..71f6dfe621
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/hand/reach.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/hand/robot.xml b/gym-grasp/gym_grasp/envs/assets/hand/robot.xml
new file mode 100644
index 0000000000..dbb9e43448
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/hand/robot.xml
@@ -0,0 +1,160 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/hand/robot_for_grasp.xml b/gym-grasp/gym_grasp/envs/assets/hand/robot_for_grasp.xml
new file mode 100644
index 0000000000..a46cc3258b
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/hand/robot_for_grasp.xml
@@ -0,0 +1,165 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/hand/shared.xml b/gym-grasp/gym_grasp/envs/assets/hand/shared.xml
new file mode 100644
index 0000000000..f27f265551
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/hand/shared.xml
@@ -0,0 +1,254 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/hand/shared_asset.xml b/gym-grasp/gym_grasp/envs/assets/hand/shared_asset.xml
new file mode 100644
index 0000000000..9db234f06a
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/assets/hand/shared_asset.xml
@@ -0,0 +1,75 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/.get b/gym-grasp/gym_grasp/envs/assets/stls/.get
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/base_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/base_link_collision.stl
new file mode 100644
index 0000000000..1ef459fd5b
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/base_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/bellows_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/bellows_link_collision.stl
new file mode 100644
index 0000000000..a7e5ab75ca
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/bellows_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/elbow_flex_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/elbow_flex_link_collision.stl
new file mode 100644
index 0000000000..b0eea0777a
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/elbow_flex_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/estop_link.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/estop_link.stl
new file mode 100644
index 0000000000..f6d1c72e85
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/estop_link.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/forearm_roll_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/forearm_roll_link_collision.stl
new file mode 100644
index 0000000000..fe468c5406
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/forearm_roll_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/gripper_link.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/gripper_link.stl
new file mode 100644
index 0000000000..8a1487401a
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/gripper_link.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/head_pan_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/head_pan_link_collision.stl
new file mode 100644
index 0000000000..c77b5b1872
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/head_pan_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/head_tilt_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/head_tilt_link_collision.stl
new file mode 100644
index 0000000000..53c2ddc58c
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/head_tilt_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/l_wheel_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/l_wheel_link_collision.stl
new file mode 100644
index 0000000000..5c1752487e
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/l_wheel_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/laser_link.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/laser_link.stl
new file mode 100644
index 0000000000..fa4882fc98
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/laser_link.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/r_wheel_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/r_wheel_link_collision.stl
new file mode 100644
index 0000000000..3742b24694
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/r_wheel_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/shoulder_lift_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/shoulder_lift_link_collision.stl
new file mode 100644
index 0000000000..c9aff0dda9
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/shoulder_lift_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/shoulder_pan_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/shoulder_pan_link_collision.stl
new file mode 100644
index 0000000000..ac17a94375
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/shoulder_pan_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/torso_fixed_link.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/torso_fixed_link.stl
new file mode 100644
index 0000000000..7cf7fc147e
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/torso_fixed_link.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/torso_lift_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/torso_lift_link_collision.stl
new file mode 100644
index 0000000000..4ce5fcf9c5
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/torso_lift_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/upperarm_roll_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/upperarm_roll_link_collision.stl
new file mode 100644
index 0000000000..120793232e
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/upperarm_roll_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/wrist_flex_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/wrist_flex_link_collision.stl
new file mode 100644
index 0000000000..3215d2e1de
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/wrist_flex_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/fetch/wrist_roll_link_collision.stl b/gym-grasp/gym_grasp/envs/assets/stls/fetch/wrist_roll_link_collision.stl
new file mode 100644
index 0000000000..742bdd9197
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/fetch/wrist_roll_link_collision.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/F1.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/F1.stl
new file mode 100644
index 0000000000..515d3c9016
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/F1.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/F2.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/F2.stl
new file mode 100644
index 0000000000..7bc5e20e06
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/F2.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/F3.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/F3.stl
new file mode 100644
index 0000000000..223f06f5bf
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/F3.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/TH1_z.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/TH1_z.stl
new file mode 100644
index 0000000000..400ee2d625
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/TH1_z.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/TH2_z.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/TH2_z.stl
new file mode 100644
index 0000000000..5ace8388b9
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/TH2_z.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/TH3_z.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/TH3_z.stl
new file mode 100644
index 0000000000..23485abc72
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/TH3_z.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/forearm_electric.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/forearm_electric.stl
new file mode 100644
index 0000000000..80f6f3da18
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/forearm_electric.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/forearm_electric_cvx.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/forearm_electric_cvx.stl
new file mode 100644
index 0000000000..3c30f57eaa
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/forearm_electric_cvx.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/knuckle.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/knuckle.stl
new file mode 100644
index 0000000000..4faedd7540
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/knuckle.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/lfmetacarpal.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/lfmetacarpal.stl
new file mode 100644
index 0000000000..535cf4dbca
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/lfmetacarpal.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/palm.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/palm.stl
new file mode 100644
index 0000000000..65e47eb65d
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/palm.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/stls/hand/wrist.stl b/gym-grasp/gym_grasp/envs/assets/stls/hand/wrist.stl
new file mode 100644
index 0000000000..420d5f9c67
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/stls/hand/wrist.stl differ
diff --git a/gym-grasp/gym_grasp/envs/assets/textures/block.png b/gym-grasp/gym_grasp/envs/assets/textures/block.png
new file mode 100644
index 0000000000..0243b8f331
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/textures/block.png differ
diff --git a/gym-grasp/gym_grasp/envs/assets/textures/block_hidden.png b/gym-grasp/gym_grasp/envs/assets/textures/block_hidden.png
new file mode 100644
index 0000000000..e08b8613c4
Binary files /dev/null and b/gym-grasp/gym_grasp/envs/assets/textures/block_hidden.png differ
diff --git a/gym-grasp/gym_grasp/envs/fetch/__init__.py b/gym-grasp/gym_grasp/envs/fetch/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/gym-grasp/gym_grasp/envs/fetch/pick_and_place.py b/gym-grasp/gym_grasp/envs/fetch/pick_and_place.py
new file mode 100644
index 0000000000..c6c5e7ea99
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/fetch/pick_and_place.py
@@ -0,0 +1,23 @@
+import os
+from gym import utils
+from gym.envs.robotics import fetch_env
+
+
+# Ensure we get the path separator correct on windows
+MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml')
+
+
+class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle):
+ def __init__(self, reward_type='sparse'):
+ initial_qpos = {
+ 'robot0:slide0': 0.405,
+ 'robot0:slide1': 0.48,
+ 'robot0:slide2': 0.0,
+ 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
+ }
+ fetch_env.FetchEnv.__init__(
+ self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
+ gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
+ obj_range=0.15, target_range=0.15, distance_threshold=0.05,
+ initial_qpos=initial_qpos, reward_type=reward_type)
+ utils.EzPickle.__init__(self)
diff --git a/gym-grasp/gym_grasp/envs/fetch/push.py b/gym-grasp/gym_grasp/envs/fetch/push.py
new file mode 100644
index 0000000000..bde15ec00e
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/fetch/push.py
@@ -0,0 +1,23 @@
+import os
+from gym import utils
+from gym.envs.robotics import fetch_env
+
+
+# Ensure we get the path separator correct on windows
+MODEL_XML_PATH = os.path.join('fetch', 'push.xml')
+
+
+class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle):
+ def __init__(self, reward_type='sparse'):
+ initial_qpos = {
+ 'robot0:slide0': 0.405,
+ 'robot0:slide1': 0.48,
+ 'robot0:slide2': 0.0,
+ 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
+ }
+ fetch_env.FetchEnv.__init__(
+ self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
+ gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
+ obj_range=0.15, target_range=0.15, distance_threshold=0.05,
+ initial_qpos=initial_qpos, reward_type=reward_type)
+ utils.EzPickle.__init__(self)
diff --git a/gym-grasp/gym_grasp/envs/fetch/reach.py b/gym-grasp/gym_grasp/envs/fetch/reach.py
new file mode 100644
index 0000000000..cc3fc46c65
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/fetch/reach.py
@@ -0,0 +1,22 @@
+import os
+from gym import utils
+from gym.envs.robotics import fetch_env
+
+
+# Ensure we get the path separator correct on windows
+MODEL_XML_PATH = os.path.join('fetch', 'reach.xml')
+
+
+class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
+ def __init__(self, reward_type='sparse'):
+ initial_qpos = {
+ 'robot0:slide0': 0.4049,
+ 'robot0:slide1': 0.48,
+ 'robot0:slide2': 0.0,
+ }
+ fetch_env.FetchEnv.__init__(
+ self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20,
+ gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
+ obj_range=0.15, target_range=0.15, distance_threshold=0.05,
+ initial_qpos=initial_qpos, reward_type=reward_type)
+ utils.EzPickle.__init__(self)
diff --git a/gym-grasp/gym_grasp/envs/fetch/slide.py b/gym-grasp/gym_grasp/envs/fetch/slide.py
new file mode 100644
index 0000000000..8c893b2b7d
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/fetch/slide.py
@@ -0,0 +1,25 @@
+import os
+import numpy as np
+
+from gym import utils
+from gym.envs.robotics import fetch_env
+
+
+# Ensure we get the path separator correct on windows
+MODEL_XML_PATH = os.path.join('fetch', 'slide.xml')
+
+
+class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle):
+ def __init__(self, reward_type='sparse'):
+ initial_qpos = {
+ 'robot0:slide0': 0.05,
+ 'robot0:slide1': 0.48,
+ 'robot0:slide2': 0.0,
+ 'object0:joint': [1.7, 1.1, 0.4, 1., 0., 0., 0.],
+ }
+ fetch_env.FetchEnv.__init__(
+ self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
+ gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]),
+ obj_range=0.1, target_range=0.3, distance_threshold=0.05,
+ initial_qpos=initial_qpos, reward_type=reward_type)
+ utils.EzPickle.__init__(self)
diff --git a/gym-grasp/gym_grasp/envs/fetch_env.py b/gym-grasp/gym_grasp/envs/fetch_env.py
new file mode 100644
index 0000000000..4916c4bcaf
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/fetch_env.py
@@ -0,0 +1,187 @@
+import numpy as np
+
+from gym.envs.robotics import rotations, robot_env, utils
+
+
+def goal_distance(goal_a, goal_b):
+ assert goal_a.shape == goal_b.shape
+ return np.linalg.norm(goal_a - goal_b, axis=-1)
+
+
+class FetchEnv(robot_env.RobotEnv):
+ """Superclass for all Fetch environments.
+ """
+
+ def __init__(
+ self, model_path, n_substeps, gripper_extra_height, block_gripper,
+ has_object, target_in_the_air, target_offset, obj_range, target_range,
+ distance_threshold, initial_qpos, reward_type,
+ ):
+ """Initializes a new Fetch environment.
+
+ Args:
+ model_path (string): path to the environments XML file
+ n_substeps (int): number of substeps the simulation runs on every call to step
+ gripper_extra_height (float): additional height above the table when positioning the gripper
+ block_gripper (boolean): whether or not the gripper is blocked (i.e. not movable) or not
+ has_object (boolean): whether or not the environment has an object
+ target_in_the_air (boolean): whether or not the target should be in the air above the table or on the table surface
+ target_offset (float or array with 3 elements): offset of the target
+ obj_range (float): range of a uniform distribution for sampling initial object positions
+ target_range (float): range of a uniform distribution for sampling a target
+ distance_threshold (float): the threshold after which a goal is considered achieved
+ initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
+ reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
+ """
+ self.gripper_extra_height = gripper_extra_height
+ self.block_gripper = block_gripper
+ self.has_object = has_object
+ self.target_in_the_air = target_in_the_air
+ self.target_offset = target_offset
+ self.obj_range = obj_range
+ self.target_range = target_range
+ self.distance_threshold = distance_threshold
+ self.reward_type = reward_type
+
+ super(FetchEnv, self).__init__(
+ model_path=model_path, n_substeps=n_substeps, n_actions=4,
+ initial_qpos=initial_qpos)
+
+ # GoalEnv methods
+ # ----------------------------
+
+ def compute_reward(self, achieved_goal, goal, info):
+ # Compute distance between goal and the achieved goal.
+ d = goal_distance(achieved_goal, goal)
+ if self.reward_type == 'sparse':
+ return -(d > self.distance_threshold).astype(np.float32)
+ else:
+ return -d
+
+ # RobotEnv methods
+ # ----------------------------
+
+ def _step_callback(self):
+ if self.block_gripper:
+ self.sim.data.set_joint_qpos('robot0:l_gripper_finger_joint', 0.)
+ self.sim.data.set_joint_qpos('robot0:r_gripper_finger_joint', 0.)
+ self.sim.forward()
+
+ def _set_action(self, action):
+ assert action.shape == (4,)
+ action = action.copy() # ensure that we don't change the action outside of this scope
+ pos_ctrl, gripper_ctrl = action[:3], action[3]
+
+ pos_ctrl *= 0.05 # limit maximum change in position
+ rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion
+ gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
+ assert gripper_ctrl.shape == (2,)
+ if self.block_gripper:
+ gripper_ctrl = np.zeros_like(gripper_ctrl)
+ action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
+
+ # Apply action to simulation.
+ utils.ctrl_set_action(self.sim, action)
+ utils.mocap_set_action(self.sim, action)
+
+ def _get_obs(self):
+ # positions
+ grip_pos = self.sim.data.get_site_xpos('robot0:grip')
+ dt = self.sim.nsubsteps * self.sim.model.opt.timestep
+ grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
+ robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
+ if self.has_object:
+ object_pos = self.sim.data.get_site_xpos('object0')
+ # rotations
+ object_rot = rotations.mat2euler(self.sim.data.get_site_xmat('object0'))
+ # velocities
+ object_velp = self.sim.data.get_site_xvelp('object0') * dt
+ object_velr = self.sim.data.get_site_xvelr('object0') * dt
+ # gripper state
+ object_rel_pos = object_pos - grip_pos
+ object_velp -= grip_velp
+ else:
+ object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(0)
+ gripper_state = robot_qpos[-2:]
+ gripper_vel = robot_qvel[-2:] * dt # change to a scalar if the gripper is made symmetric
+
+ if not self.has_object:
+ achieved_goal = grip_pos.copy()
+ else:
+ achieved_goal = np.squeeze(object_pos.copy())
+ obs = np.concatenate([
+ grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(),
+ object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel,
+ ])
+
+ return {
+ 'observation': obs.copy(),
+ 'achieved_goal': achieved_goal.copy(),
+ 'desired_goal': self.goal.copy(),
+ }
+
+ def _viewer_setup(self):
+ body_id = self.sim.model.body_name2id('robot0:gripper_link')
+ lookat = self.sim.data.body_xpos[body_id]
+ for idx, value in enumerate(lookat):
+ self.viewer.cam.lookat[idx] = value
+ self.viewer.cam.distance = 2.5
+ self.viewer.cam.azimuth = 132.
+ self.viewer.cam.elevation = -14.
+
+ def _render_callback(self):
+ # Visualize target.
+ sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
+ site_id = self.sim.model.site_name2id('target0')
+ self.sim.model.site_pos[site_id] = self.goal - sites_offset[0]
+ self.sim.forward()
+
+ def _reset_sim(self):
+ self.sim.set_state(self.initial_state)
+
+ # Randomize start position of object.
+ if self.has_object:
+ object_xpos = self.initial_gripper_xpos[:2]
+ while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1:
+ object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range, size=2)
+ object_qpos = self.sim.data.get_joint_qpos('object0:joint')
+ assert object_qpos.shape == (7,)
+ object_qpos[:2] = object_xpos
+ self.sim.data.set_joint_qpos('object0:joint', object_qpos)
+
+ self.sim.forward()
+ return True
+
+ def _sample_goal(self):
+ if self.has_object:
+ goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
+ goal += self.target_offset
+ goal[2] = self.height_offset
+ if self.target_in_the_air and self.np_random.uniform() < 0.5:
+ goal[2] += self.np_random.uniform(0, 0.45)
+ else:
+ goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-0.15, 0.15, size=3)
+ return goal.copy()
+
+ def _is_success(self, achieved_goal, desired_goal):
+ d = goal_distance(achieved_goal, desired_goal)
+ return (d < self.distance_threshold).astype(np.float32)
+
+ def _env_setup(self, initial_qpos):
+ for name, value in initial_qpos.items():
+ self.sim.data.set_joint_qpos(name, value)
+ utils.reset_mocap_welds(self.sim)
+ self.sim.forward()
+
+ # Move end effector into position.
+ gripper_target = np.array([-0.498, 0.005, -0.431 + self.gripper_extra_height]) + self.sim.data.get_site_xpos('robot0:grip')
+ gripper_rotation = np.array([1., 0., 1., 0.])
+ self.sim.data.set_mocap_pos('robot0:mocap', gripper_target)
+ self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation)
+ for _ in range(10):
+ self.sim.step()
+
+ # Extract information for sampling goals.
+ self.initial_gripper_xpos = self.sim.data.get_site_xpos('robot0:grip').copy()
+ if self.has_object:
+ self.height_offset = self.sim.data.get_site_xpos('object0')[2]
diff --git a/gym-grasp/gym_grasp/envs/hand/__init__.py b/gym-grasp/gym_grasp/envs/hand/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/gym-grasp/gym_grasp/envs/hand/grasp_block.py b/gym-grasp/gym_grasp/envs/hand/grasp_block.py
new file mode 100644
index 0000000000..e1798a04d0
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/hand/grasp_block.py
@@ -0,0 +1,366 @@
+import os
+import numpy as np
+import random
+
+from gym import utils, error
+# from gym.envs.robotics import rotations, hand_env
+from gym_grasp.envs import rotations, hand_env
+from gym.envs.robotics.utils import robot_get_obs
+
+try:
+ import mujoco_py
+except ImportError as e:
+ raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
+
+
+def quat_from_angle_and_axis(angle, axis):
+ assert axis.shape == (3,)
+ axis /= np.linalg.norm(axis)
+ quat = np.concatenate([[np.cos(angle / 2.)], np.sin(angle / 2.) * axis])
+ quat /= np.linalg.norm(quat)
+ return quat
+
+
+# Ensure we get the path separator correct on windows
+MANIPULATE_BLOCK_XML = os.path.join('hand', 'manipulate_block.xml')
+MANIPULATE_EGG_XML = os.path.join('hand', 'manipulate_egg.xml')
+MANIPULATE_PEN_XML = os.path.join('hand', 'manipulate_pen.xml')
+GRASP_BLOCK_XML = os.path.join('hand', 'grasp_block.xml')
+
+
+class ManipulateEnv(hand_env.HandEnv, utils.EzPickle):
+ def __init__(
+ self, model_path, target_position, target_rotation,
+ target_position_range, reward_type, initial_qpos={},
+ randomize_initial_position=True, randomize_initial_rotation=True, randomize_object=True,
+ distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False,
+ ignore_z_target_rotation=False,
+ target_id = 0, num_axis = 5, reward_lambda=1.
+ ):
+ """Initializes a new Hand manipulation environment.
+
+ Args:
+ model_path (string): path to the environments XML file
+ target_position (string): the type of target position:
+ - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily
+ - fixed: target position is set to the initial position of the object
+ - random: target position is fully randomized according to target_position_range
+ target_rotation (string): the type of target rotation:
+ - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily
+ - fixed: target rotation is set to the initial rotation of the object
+ - xyz: fully randomized target rotation around the X, Y and Z axis
+ - z: fully randomized target rotation around the Z axis
+ - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y
+ ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored
+ target_position_range (np.array of shape (3, 2)): range of the target_position randomization
+ reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
+ initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
+ randomize_initial_position (boolean): whether or not to randomize the initial position of the object
+ randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object
+ randomize_object (boolean)
+ distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved
+ rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved
+ n_substeps (int): number of substeps the simulation runs on every call to step
+ relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state
+ target_id (int): target id
+ num_axis (int): the number of components
+ reward_lambda (float) : a weight for the second term of the reward function
+ """
+ self.target_position = target_position
+ self.target_rotation = target_rotation
+ self.target_position_range = target_position_range
+ self.parallel_quats = [rotations.euler2quat(r) for r in rotations.get_parallel_rotations()]
+ self.randomize_initial_rotation = randomize_initial_rotation
+ self.randomize_initial_position = randomize_initial_position
+ self.distance_threshold = distance_threshold
+ self.rotation_threshold = rotation_threshold
+ self.reward_type = reward_type
+ self.ignore_z_target_rotation = ignore_z_target_rotation
+
+ self.variance_ratio = []
+
+ self.object_list = ["box:joint", "apple:joint", "banana:joint", "beerbottle:joint", "book:joint",
+ "needle:joint", "pen:joint", "teacup:joint"]
+ self.target_id = target_id
+ self.num_axis = num_axis # the number of components
+ self.randomize_object = randomize_object # random target (boolean)
+ self.reward_lambda = reward_lambda # a weight for the second term of the reward function (float)
+
+ if self.randomize_object == True:
+ self.object = self.object_list[random.randrange(0, 8, 1)] # in case of randomly selected target
+ else:
+ self.object = self.object_list[self.target_id] # target
+
+ self.init_object_qpos = np.array([1, 0.87, 0.2, 1, 0, 0, 0])
+
+ assert self.target_position in ['ignore', 'fixed', 'random']
+ assert self.target_rotation in ['ignore', 'fixed', 'xyz', 'z', 'parallel']
+
+ hand_env.HandEnv.__init__(
+ self, model_path, n_substeps=n_substeps, initial_qpos=initial_qpos,
+ relative_control=relative_control)
+ utils.EzPickle.__init__(self)
+
+ def _get_achieved_goal(self):
+ # Object position and rotation.
+ object_qpos = self.sim.data.get_joint_qpos(self.object)
+ assert object_qpos.shape == (7,)
+ return object_qpos
+
+ # def _randamize_target(self):
+ # self.sim.data.set_joint_qpos("target0:joint", [1, 0.87, 0.4, 1, 0, 0, 0])
+ # # print("##### {} #####".format(self.sim.data.get_joint_qpos("target0:joint")))
+
+ def _goal_distance(self, goal_a, goal_b):
+ assert goal_a.shape == goal_b.shape
+ assert goal_a.shape[-1] == 7
+
+ d_pos = np.zeros_like(goal_a[..., 0])
+ d_rot = np.zeros_like(goal_b[..., 0])
+ if self.target_position != 'ignore':
+ delta_pos = goal_a[..., :3] - goal_b[..., :3]
+ d_pos = np.linalg.norm(delta_pos, axis=-1)
+
+ if self.target_rotation != 'ignore':
+ quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
+
+ if self.ignore_z_target_rotation:
+ # Special case: We want to ignore the Z component of the rotation.
+ # This code here assumes Euler angles with xyz convention. We first transform
+ # to euler, then set the Z component to be equal between the two, and finally
+ # transform back into quaternions.
+ euler_a = rotations.quat2euler(quat_a)
+ euler_b = rotations.quat2euler(quat_b)
+ euler_a[2] = euler_b[2]
+ quat_a = rotations.euler2quat(euler_a)
+
+ # Subtract quaternions and extract angle between them.
+ quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
+ angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
+ d_rot = angle_diff
+ assert d_pos.shape == d_rot.shape
+ return d_pos, d_rot
+
+ # GoalEnv methods
+ # ----------------------------
+
+ def compute_reward(self, achieved_goal, goal, info):
+ if self.reward_type == 'sparse':
+ success = self._is_success(achieved_goal, goal).astype(np.float32)
+ return (success - 1.)
+ else:
+ d_pos, d_rot = self._goal_distance(achieved_goal, goal)
+ # We weigh the difference in position to avoid that `d_pos` (in meters) is completely
+ # dominated by `d_rot` (in radians).
+
+ # -- nishimura
+ #reward = -(10. * d_pos) # d_pos : distance_error
+ reward = self._is_success(achieved_goal, goal)-1. # default
+ # --
+
+ # -- reward Contributed rate
+ if len(self.variance_ratio) > 0:
+ vr = self.variance_ratio[-1]
+ l = np.sum(vr[:(self.num_axis)])
+ self.variance_ratio = []
+
+ reward -= self.reward_lambda*(1.-l) # nishimura
+ # --
+
+ return reward
+
+ # RobotEnv methods
+ # ----------------------------
+
+ def _is_success(self, achieved_goal, desired_goal):
+ d_pos, d_rot = self._goal_distance(achieved_goal, desired_goal)
+ achieved_pos = (d_pos < self.distance_threshold).astype(np.float32)
+ achieved_rot = (d_rot < self.rotation_threshold).astype(np.float32)
+ achieved_both = achieved_pos * achieved_rot
+ return achieved_both
+
+ def _env_setup(self, initial_qpos):
+ for name, value in initial_qpos.items():
+ self.sim.data.set_joint_qpos(name, value)
+ self.sim.forward()
+
+ def _reset_sim(self):
+ self.sim.set_state(self.initial_state)
+ self.sim.forward()
+
+ # -- motoda
+ if self.randomize_object == True:
+ self.object = self.object_list[random.randrange(0, 8, 1)] # in case of randomly selected target
+ else:
+ self.object = self.object_list[self.target_id] # target
+ # --
+ initial_qpos = self.init_object_qpos
+ initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
+ assert initial_qpos.shape == (7,)
+ assert initial_pos.shape == (3,)
+ assert initial_quat.shape == (4,)
+ initial_qpos = None
+
+ # Randomization initial rotation.
+ if self.randomize_initial_rotation:
+ if self.target_rotation == 'z':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = np.array([0., 0., 1.])
+ offset_quat = quat_from_angle_and_axis(angle, axis)
+ initial_quat = rotations.quat_mul(initial_quat, offset_quat)
+ elif self.target_rotation == 'parallel':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = np.array([0., 0., 1.])
+ z_quat = quat_from_angle_and_axis(angle, axis)
+ parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))]
+ offset_quat = rotations.quat_mul(z_quat, parallel_quat)
+ initial_quat = rotations.quat_mul(initial_quat, offset_quat)
+ elif self.target_rotation in ['xyz', 'ignore']:
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = self.np_random.uniform(-1., 1., size=3)
+ offset_quat = quat_from_angle_and_axis(angle, axis)
+ initial_quat = rotations.quat_mul(initial_quat, offset_quat)
+ elif self.target_rotation == 'fixed':
+ pass
+ else:
+ raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation))
+
+ # Randomize initial position.
+ if self.randomize_initial_position:
+ if self.target_position != 'fixed':
+ initial_pos += self.np_random.normal(size=3, scale=0.005)
+
+ initial_quat /= np.linalg.norm(initial_quat)
+ initial_qpos = np.concatenate([initial_pos, initial_quat])
+ self.sim.data.set_joint_qpos(self.object, initial_qpos)
+
+ def is_on_palm():
+ self.sim.forward()
+ cube_middle_idx = self.sim.model.site_name2id('object:center')
+ cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx]
+ is_on_palm = (cube_middle_pos[2] > 0.04)
+ return is_on_palm
+
+ # Run the simulation for a bunch of timesteps to let everything settle in.
+ for _ in range(10):
+ self._set_action(np.zeros(21))
+ try:
+ self.sim.step()
+ except mujoco_py.MujocoException:
+ return False
+ return is_on_palm()
+
+ def _sample_goal(self):
+ # Select a goal for the object position.
+ target_pos = None
+ if self.target_position == 'random':
+ assert self.target_position_range.shape == (3, 2)
+ offset = self.np_random.uniform(self.target_position_range[:, 0], self.target_position_range[:, 1])
+ assert offset.shape == (3,)
+ target_pos = self.sim.data.get_joint_qpos(self.object)[:3] + offset
+ elif self.target_position in ['ignore', 'fixed']:
+ target_pos = self.sim.data.get_joint_qpos(self.object)[:3]
+ else:
+ raise error.Error('Unknown target_position option "{}".'.format(self.target_position))
+ assert target_pos is not None
+ assert target_pos.shape == (3,)
+
+ # Select a goal for the object rotation.
+ target_quat = None
+ if self.target_rotation == 'z':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = np.array([0., 0., 1.])
+ target_quat = quat_from_angle_and_axis(angle, axis)
+ elif self.target_rotation == 'parallel':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = np.array([0., 0., 1.])
+ target_quat = quat_from_angle_and_axis(angle, axis)
+ parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))]
+ target_quat = rotations.quat_mul(target_quat, parallel_quat)
+ elif self.target_rotation == 'xyz':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = self.np_random.uniform(-1., 1., size=3)
+ target_quat = quat_from_angle_and_axis(angle, axis)
+ elif self.target_rotation in ['ignore', 'fixed']:
+ target_quat = self.sim.data.get_joint_qpos(self.object)
+ else:
+ raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation))
+ assert target_quat is not None
+ assert target_quat.shape == (4,)
+
+ target_quat /= np.linalg.norm(target_quat) # normalized quaternion
+ goal = np.concatenate([target_pos, target_quat])
+ return goal
+
+ def _render_callback(self):
+ # Assign current state to target object but offset a bit so that the actual object
+ # is not obscured.
+ goal = self.goal.copy()
+ assert goal.shape == (7,)
+ if self.target_position == 'ignore':
+ # Move the object to the side since we do not care about it's position.
+ goal[0] += 0.15
+ self.sim.data.set_joint_qpos('target:joint', goal)
+ self.sim.data.set_joint_qvel('target:joint', np.zeros(6))
+
+ if 'object_hidden' in self.sim.model.geom_names:
+ hidden_id = self.sim.model.geom_name2id('object_hidden')
+ self.sim.model.geom_rgba[hidden_id, 3] = 1.
+ self.sim.forward()
+
+ def _get_obs(self):
+ robot_qpos, robot_qvel = robot_get_obs(self.sim)
+ object_qvel = self.sim.data.get_joint_qvel(self.object)
+ achieved_goal = self._get_achieved_goal().ravel() # this contains the object position + rotation
+ observation = np.concatenate([robot_qpos, robot_qvel, object_qvel, achieved_goal])
+ return {
+ 'observation': observation.copy(),
+ 'achieved_goal': achieved_goal.copy(),
+ 'desired_goal': self.goal.ravel().copy(),
+ }
+
+
+class HandBlockEnv(ManipulateEnv):
+ def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
+ super(HandBlockEnv, self).__init__(
+ model_path=MANIPULATE_BLOCK_XML, target_position=target_position,
+ target_rotation=target_rotation,
+ target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
+ reward_type=reward_type)
+
+
+class HandEggEnv(ManipulateEnv):
+ def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
+ super(HandEggEnv, self).__init__(
+ model_path=MANIPULATE_EGG_XML, target_position=target_position,
+ target_rotation=target_rotation,
+ target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
+ reward_type=reward_type)
+
+
+class HandPenEnv(ManipulateEnv):
+ def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
+ super(HandPenEnv, self).__init__(
+ model_path=MANIPULATE_PEN_XML, target_position=target_position,
+ target_rotation=target_rotation,
+ target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
+ randomize_initial_rotation=False, reward_type=reward_type,
+ ignore_z_target_rotation=True, distance_threshold=0.05)
+
+
+class GraspBlockEnv(ManipulateEnv):
+ def __init__(self, target_position='random', target_rotation='xyz', reward_type=None):
+ super(GraspBlockEnv, self).__init__(
+ model_path=GRASP_BLOCK_XML, target_position=target_position,
+ target_rotation=target_rotation,
+ target_position_range=np.array([(-0.025, 0.025), (-0.025, 0.025), (0.2, 0.25)]),
+ randomize_initial_position=False, reward_type=reward_type,
+ distance_threshold=0.05,
+ rotation_threshold=100.0,
+ randomize_object=False ,target_id = 0, num_axis = 5
+ )
+'''
+Object_list:
+ self.object_list = ["box:joint", "apple:joint", "banana:joint", "beerbottle:joint", "book:joint",
+ "needle:joint", "pen:joint", "teacup:joint"]
+'''
diff --git a/gym-grasp/gym_grasp/envs/hand/grasp_env.py b/gym-grasp/gym_grasp/envs/hand/grasp_env.py
new file mode 100644
index 0000000000..89823864b2
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/hand/grasp_env.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+"""
+Displays robot fetch at a disco party.
+"""
+from mujoco_py import load_model_from_path, MjSim, MjViewer
+import math
+import os
+
+model = load_model_from_path("../assets/hand/grasp_block.xml")
+sim = MjSim(model)
+
+viewer = MjViewer(sim)
+
+t = 0
+
+while True:
+ viewer.render()
+ t += 1
+ sim.step()
+ state = sim.get_state()
+
+ state.qpos[1] = 0.1*math.sin(0.01*t)
+ state.qpos[0] = 0.05*math.cos(0.01*t)
+
+ sim.set_state(state)
+ # if t > 100 and os.getenv('TESTING') is not None:
+ # break
diff --git a/gym-grasp/gym_grasp/envs/hand/manipulate.py b/gym-grasp/gym_grasp/envs/hand/manipulate.py
new file mode 100644
index 0000000000..de55f34827
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/hand/manipulate.py
@@ -0,0 +1,299 @@
+import os
+import numpy as np
+
+from gym import utils, error
+from gym.envs.robotics import rotations, hand_env
+from gym.envs.robotics.utils import robot_get_obs
+
+try:
+ import mujoco_py
+except ImportError as e:
+ raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
+
+
+def quat_from_angle_and_axis(angle, axis):
+ assert axis.shape == (3,)
+ axis /= np.linalg.norm(axis)
+ quat = np.concatenate([[np.cos(angle / 2.)], np.sin(angle / 2.) * axis])
+ quat /= np.linalg.norm(quat)
+ return quat
+
+
+# Ensure we get the path separator correct on windows
+MANIPULATE_BLOCK_XML = os.path.join('hand', 'manipulate_block.xml')
+MANIPULATE_EGG_XML = os.path.join('hand', 'manipulate_egg.xml')
+MANIPULATE_PEN_XML = os.path.join('hand', 'manipulate_pen.xml')
+
+
+class ManipulateEnv(hand_env.HandEnv, utils.EzPickle):
+ def __init__(
+ self, model_path, target_position, target_rotation,
+ target_position_range, reward_type, initial_qpos={},
+ randomize_initial_position=True, randomize_initial_rotation=True,
+ distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False,
+ ignore_z_target_rotation=False,
+ ):
+ """Initializes a new Hand manipulation environment.
+
+ Args:
+ model_path (string): path to the environments XML file
+ target_position (string): the type of target position:
+ - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily
+ - fixed: target position is set to the initial position of the object
+ - random: target position is fully randomized according to target_position_range
+ target_rotation (string): the type of target rotation:
+ - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily
+ - fixed: target rotation is set to the initial rotation of the object
+ - xyz: fully randomized target rotation around the X, Y and Z axis
+ - z: fully randomized target rotation around the Z axis
+ - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y
+ ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored
+ target_position_range (np.array of shape (3, 2)): range of the target_position randomization
+ reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
+ initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
+ randomize_initial_position (boolean): whether or not to randomize the initial position of the object
+ randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object
+ distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved
+ rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved
+ n_substeps (int): number of substeps the simulation runs on every call to step
+ relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state
+ """
+ self.target_position = target_position
+ self.target_rotation = target_rotation
+ self.target_position_range = target_position_range
+ self.parallel_quats = [rotations.euler2quat(r) for r in rotations.get_parallel_rotations()]
+ self.randomize_initial_rotation = randomize_initial_rotation
+ self.randomize_initial_position = randomize_initial_position
+ self.distance_threshold = distance_threshold
+ self.rotation_threshold = rotation_threshold
+ self.reward_type = reward_type
+ self.ignore_z_target_rotation = ignore_z_target_rotation
+
+ assert self.target_position in ['ignore', 'fixed', 'random']
+ assert self.target_rotation in ['ignore', 'fixed', 'xyz', 'z', 'parallel']
+
+ hand_env.HandEnv.__init__(
+ self, model_path, n_substeps=n_substeps, initial_qpos=initial_qpos,
+ relative_control=relative_control)
+ utils.EzPickle.__init__(self)
+
+ def _get_achieved_goal(self):
+ # Object position and rotation.
+ object_qpos = self.sim.data.get_joint_qpos('object:joint')
+ assert object_qpos.shape == (7,)
+ return object_qpos
+
+ def _goal_distance(self, goal_a, goal_b):
+ assert goal_a.shape == goal_b.shape
+ assert goal_a.shape[-1] == 7
+
+ d_pos = np.zeros_like(goal_a[..., 0])
+ d_rot = np.zeros_like(goal_b[..., 0])
+ if self.target_position != 'ignore':
+ delta_pos = goal_a[..., :3] - goal_b[..., :3]
+ d_pos = np.linalg.norm(delta_pos, axis=-1)
+
+ if self.target_rotation != 'ignore':
+ quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
+
+ if self.ignore_z_target_rotation:
+ # Special case: We want to ignore the Z component of the rotation.
+ # This code here assumes Euler angles with xyz convention. We first transform
+ # to euler, then set the Z component to be equal between the two, and finally
+ # transform back into quaternions.
+ euler_a = rotations.quat2euler(quat_a)
+ euler_b = rotations.quat2euler(quat_b)
+ euler_a[2] = euler_b[2]
+ quat_a = rotations.euler2quat(euler_a)
+
+ # Subtract quaternions and extract angle between them.
+ quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
+ angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
+ d_rot = angle_diff
+ assert d_pos.shape == d_rot.shape
+ return d_pos, d_rot
+
+ # GoalEnv methods
+ # ----------------------------
+
+ def compute_reward(self, achieved_goal, goal, info):
+ if self.reward_type == 'sparse':
+ success = self._is_success(achieved_goal, goal).astype(np.float32)
+ return (success - 1.)
+ else:
+ d_pos, d_rot = self._goal_distance(achieved_goal, goal)
+ # We weigh the difference in position to avoid that `d_pos` (in meters) is completely
+ # dominated by `d_rot` (in radians).
+ return -(10. * d_pos + d_rot)
+
+ # RobotEnv methods
+ # ----------------------------
+
+ def _is_success(self, achieved_goal, desired_goal):
+ d_pos, d_rot = self._goal_distance(achieved_goal, desired_goal)
+ achieved_pos = (d_pos < self.distance_threshold).astype(np.float32)
+ achieved_rot = (d_rot < self.rotation_threshold).astype(np.float32)
+ achieved_both = achieved_pos * achieved_rot
+ return achieved_both
+
+ def _env_setup(self, initial_qpos):
+ for name, value in initial_qpos.items():
+ self.sim.data.set_joint_qpos(name, value)
+ self.sim.forward()
+
+ def _reset_sim(self):
+ self.sim.set_state(self.initial_state)
+ self.sim.forward()
+
+ initial_qpos = self.sim.data.get_joint_qpos('object:joint').copy()
+ initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
+ assert initial_qpos.shape == (7,)
+ assert initial_pos.shape == (3,)
+ assert initial_quat.shape == (4,)
+ initial_qpos = None
+
+ # Randomization initial rotation.
+ if self.randomize_initial_rotation:
+ if self.target_rotation == 'z':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = np.array([0., 0., 1.])
+ offset_quat = quat_from_angle_and_axis(angle, axis)
+ initial_quat = rotations.quat_mul(initial_quat, offset_quat)
+ elif self.target_rotation == 'parallel':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = np.array([0., 0., 1.])
+ z_quat = quat_from_angle_and_axis(angle, axis)
+ parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))]
+ offset_quat = rotations.quat_mul(z_quat, parallel_quat)
+ initial_quat = rotations.quat_mul(initial_quat, offset_quat)
+ elif self.target_rotation in ['xyz', 'ignore']:
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = self.np_random.uniform(-1., 1., size=3)
+ offset_quat = quat_from_angle_and_axis(angle, axis)
+ initial_quat = rotations.quat_mul(initial_quat, offset_quat)
+ elif self.target_rotation == 'fixed':
+ pass
+ else:
+ raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation))
+
+ # Randomize initial position.
+ if self.randomize_initial_position:
+ if self.target_position != 'fixed':
+ initial_pos += self.np_random.normal(size=3, scale=0.005)
+
+ initial_quat /= np.linalg.norm(initial_quat)
+ initial_qpos = np.concatenate([initial_pos, initial_quat])
+ self.sim.data.set_joint_qpos('object:joint', initial_qpos)
+
+ def is_on_palm():
+ self.sim.forward()
+ cube_middle_idx = self.sim.model.site_name2id('object:center')
+ cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx]
+ is_on_palm = (cube_middle_pos[2] > 0.04)
+ return is_on_palm
+
+ # Run the simulation for a bunch of timesteps to let everything settle in.
+ for _ in range(10):
+ self._set_action(np.zeros(20))
+ try:
+ self.sim.step()
+ except mujoco_py.MujocoException:
+ return False
+ return is_on_palm()
+
+ def _sample_goal(self):
+ # Select a goal for the object position.
+ target_pos = None
+ if self.target_position == 'random':
+ assert self.target_position_range.shape == (3, 2)
+ offset = self.np_random.uniform(self.target_position_range[:, 0], self.target_position_range[:, 1])
+ assert offset.shape == (3,)
+ target_pos = self.sim.data.get_joint_qpos('object:joint')[:3] + offset
+ elif self.target_position in ['ignore', 'fixed']:
+ target_pos = self.sim.data.get_joint_qpos('object:joint')[:3]
+ else:
+ raise error.Error('Unknown target_position option "{}".'.format(self.target_position))
+ assert target_pos is not None
+ assert target_pos.shape == (3,)
+
+ # Select a goal for the object rotation.
+ target_quat = None
+ if self.target_rotation == 'z':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = np.array([0., 0., 1.])
+ target_quat = quat_from_angle_and_axis(angle, axis)
+ elif self.target_rotation == 'parallel':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = np.array([0., 0., 1.])
+ target_quat = quat_from_angle_and_axis(angle, axis)
+ parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))]
+ target_quat = rotations.quat_mul(target_quat, parallel_quat)
+ elif self.target_rotation == 'xyz':
+ angle = self.np_random.uniform(-np.pi, np.pi)
+ axis = self.np_random.uniform(-1., 1., size=3)
+ target_quat = quat_from_angle_and_axis(angle, axis)
+ elif self.target_rotation in ['ignore', 'fixed']:
+ target_quat = self.sim.data.get_joint_qpos('object:joint')
+ else:
+ raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation))
+ assert target_quat is not None
+ assert target_quat.shape == (4,)
+
+ target_quat /= np.linalg.norm(target_quat) # normalized quaternion
+ goal = np.concatenate([target_pos, target_quat])
+ return goal
+
+ def _render_callback(self):
+ # Assign current state to target object but offset a bit so that the actual object
+ # is not obscured.
+ goal = self.goal.copy()
+ assert goal.shape == (7,)
+ if self.target_position == 'ignore':
+ # Move the object to the side since we do not care about it's position.
+ goal[0] += 0.15
+ self.sim.data.set_joint_qpos('target:joint', goal)
+ self.sim.data.set_joint_qvel('target:joint', np.zeros(6))
+
+ if 'object_hidden' in self.sim.model.geom_names:
+ hidden_id = self.sim.model.geom_name2id('object_hidden')
+ self.sim.model.geom_rgba[hidden_id, 3] = 1.
+ self.sim.forward()
+
+ def _get_obs(self):
+ robot_qpos, robot_qvel = robot_get_obs(self.sim)
+ object_qvel = self.sim.data.get_joint_qvel('object:joint')
+ achieved_goal = self._get_achieved_goal().ravel() # this contains the object position + rotation
+ observation = np.concatenate([robot_qpos, robot_qvel, object_qvel, achieved_goal])
+ return {
+ 'observation': observation.copy(),
+ 'achieved_goal': achieved_goal.copy(),
+ 'desired_goal': self.goal.ravel().copy(),
+ }
+
+
+class HandBlockEnv(ManipulateEnv):
+ def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
+ super(HandBlockEnv, self).__init__(
+ model_path=MANIPULATE_BLOCK_XML, target_position=target_position,
+ target_rotation=target_rotation,
+ target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
+ reward_type=reward_type)
+
+
+class HandEggEnv(ManipulateEnv):
+ def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
+ super(HandEggEnv, self).__init__(
+ model_path=MANIPULATE_EGG_XML, target_position=target_position,
+ target_rotation=target_rotation,
+ target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
+ reward_type=reward_type)
+
+
+class HandPenEnv(ManipulateEnv):
+ def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
+ super(HandPenEnv, self).__init__(
+ model_path=MANIPULATE_PEN_XML, target_position=target_position,
+ target_rotation=target_rotation,
+ target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
+ randomize_initial_rotation=False, reward_type=reward_type,
+ ignore_z_target_rotation=True, distance_threshold=0.05)
diff --git a/gym-grasp/gym_grasp/envs/hand/reach.py b/gym-grasp/gym_grasp/envs/hand/reach.py
new file mode 100644
index 0000000000..81ed9f9540
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/hand/reach.py
@@ -0,0 +1,149 @@
+import os
+import numpy as np
+
+from gym import utils
+from gym.envs.robotics import hand_env
+from gym.envs.robotics.utils import robot_get_obs
+
+
+FINGERTIP_SITE_NAMES = [
+ 'robot0:S_fftip',
+ 'robot0:S_mftip',
+ 'robot0:S_rftip',
+ 'robot0:S_lftip',
+ 'robot0:S_thtip',
+]
+
+
+DEFAULT_INITIAL_QPOS = {
+ 'robot0:WRJ1': -0.16514339750464327,
+ 'robot0:WRJ0': -0.31973286565062153,
+ 'robot0:FFJ3': 0.14340512546557435,
+ 'robot0:FFJ2': 0.32028208333591573,
+ 'robot0:FFJ1': 0.7126053607727917,
+ 'robot0:FFJ0': 0.6705281001412586,
+ 'robot0:MFJ3': 0.000246444303701037,
+ 'robot0:MFJ2': 0.3152655251085491,
+ 'robot0:MFJ1': 0.7659800313729842,
+ 'robot0:MFJ0': 0.7323156897425923,
+ 'robot0:RFJ3': 0.00038520700007378114,
+ 'robot0:RFJ2': 0.36743546201985233,
+ 'robot0:RFJ1': 0.7119514095008576,
+ 'robot0:RFJ0': 0.6699446327514138,
+ 'robot0:LFJ4': 0.0525442258033891,
+ 'robot0:LFJ3': -0.13615534724474673,
+ 'robot0:LFJ2': 0.39872030433433003,
+ 'robot0:LFJ1': 0.7415570009679252,
+ 'robot0:LFJ0': 0.704096378652974,
+ 'robot0:THJ4': 0.003673823825070126,
+ 'robot0:THJ3': 0.5506291436028695,
+ 'robot0:THJ2': -0.014515151997119306,
+ 'robot0:THJ1': -0.0015229223564485414,
+ 'robot0:THJ0': -0.7894883021600622,
+}
+
+
+# Ensure we get the path separator correct on windows
+MODEL_XML_PATH = os.path.join('hand', 'reach.xml')
+
+
+def goal_distance(goal_a, goal_b):
+ assert goal_a.shape == goal_b.shape
+ return np.linalg.norm(goal_a - goal_b, axis=-1)
+
+
+class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
+ def __init__(
+ self, distance_threshold=0.01, n_substeps=20, relative_control=False,
+ initial_qpos=DEFAULT_INITIAL_QPOS, reward_type='sparse',
+ ):
+ self.distance_threshold = distance_threshold
+ self.reward_type = reward_type
+
+ hand_env.HandEnv.__init__(
+ self, MODEL_XML_PATH, n_substeps=n_substeps, initial_qpos=initial_qpos,
+ relative_control=relative_control)
+ utils.EzPickle.__init__(self)
+
+ def _get_achieved_goal(self):
+ goal = [self.sim.data.get_site_xpos(name) for name in FINGERTIP_SITE_NAMES]
+ return np.array(goal).flatten()
+
+ # GoalEnv methods
+ # ----------------------------
+
+ def compute_reward(self, achieved_goal, goal, info):
+ d = goal_distance(achieved_goal, goal)
+ if self.reward_type == 'sparse':
+ return -(d > self.distance_threshold).astype(np.float32)
+ else:
+ return -d
+
+ # RobotEnv methods
+ # ----------------------------
+
+ def _env_setup(self, initial_qpos):
+ for name, value in initial_qpos.items():
+ self.sim.data.set_joint_qpos(name, value)
+ self.sim.forward()
+
+ self.initial_goal = self._get_achieved_goal().copy()
+ self.palm_xpos = self.sim.data.body_xpos[self.sim.model.body_name2id('robot0:palm')].copy()
+
+ def _get_obs(self):
+ robot_qpos, robot_qvel = robot_get_obs(self.sim)
+ achieved_goal = self._get_achieved_goal().ravel()
+ observation = np.concatenate([robot_qpos, robot_qvel, achieved_goal])
+ return {
+ 'observation': observation.copy(),
+ 'achieved_goal': achieved_goal.copy(),
+ 'desired_goal': self.goal.copy(),
+ }
+
+ def _sample_goal(self):
+ thumb_name = 'robot0:S_thtip'
+ finger_names = [name for name in FINGERTIP_SITE_NAMES if name != thumb_name]
+ finger_name = self.np_random.choice(finger_names)
+
+ thumb_idx = FINGERTIP_SITE_NAMES.index(thumb_name)
+ finger_idx = FINGERTIP_SITE_NAMES.index(finger_name)
+ assert thumb_idx != finger_idx
+
+ # Pick a meeting point above the hand.
+ meeting_pos = self.palm_xpos + np.array([0.0, -0.09, 0.05])
+ meeting_pos += self.np_random.normal(scale=0.005, size=meeting_pos.shape)
+
+ # Slightly move meeting goal towards the respective finger to avoid that they
+ # overlap.
+ goal = self.initial_goal.copy().reshape(-1, 3)
+ for idx in [thumb_idx, finger_idx]:
+ offset_direction = (meeting_pos - goal[idx])
+ offset_direction /= np.linalg.norm(offset_direction)
+ goal[idx] = meeting_pos - 0.005 * offset_direction
+
+ if self.np_random.uniform() < 0.1:
+ # With some probability, ask all fingers to move back to the origin.
+ # This avoids that the thumb constantly stays near the goal position already.
+ goal = self.initial_goal.copy()
+ return goal.flatten()
+
+ def _is_success(self, achieved_goal, desired_goal):
+ d = goal_distance(achieved_goal, desired_goal)
+ return (d < self.distance_threshold).astype(np.float32)
+
+ def _render_callback(self):
+ # Visualize targets.
+ sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
+ goal = self.goal.reshape(5, 3)
+ for finger_idx in range(5):
+ site_name = 'target{}'.format(finger_idx)
+ site_id = self.sim.model.site_name2id(site_name)
+ self.sim.model.site_pos[site_id] = goal[finger_idx] - sites_offset[site_id]
+
+ # Visualize finger positions.
+ achieved_goal = self._get_achieved_goal().reshape(5, 3)
+ for finger_idx in range(5):
+ site_name = 'finger{}'.format(finger_idx)
+ site_id = self.sim.model.site_name2id(site_name)
+ self.sim.model.site_pos[site_id] = achieved_goal[finger_idx] - sites_offset[site_id]
+ self.sim.forward()
diff --git a/gym-grasp/gym_grasp/envs/hand_env.py b/gym-grasp/gym_grasp/envs/hand_env.py
new file mode 100644
index 0000000000..1de155e7ed
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/hand_env.py
@@ -0,0 +1,49 @@
+import os
+import copy
+import numpy as np
+
+import gym
+from gym import error, spaces
+from gym.utils import seeding
+from gym_grasp.envs import robot_env
+
+
+class HandEnv(robot_env.RobotEnv):
+ def __init__(self, model_path, n_substeps, initial_qpos, relative_control):
+ self.relative_control = relative_control
+
+ super(HandEnv, self).__init__(
+ model_path=model_path, n_substeps=n_substeps, n_actions=21,
+ initial_qpos=initial_qpos)
+
+ # RobotEnv methods
+ # ----------------------------
+
+ def _set_action(self, action):
+ assert action.shape == (21,)
+
+ ctrlrange = self.sim.model.actuator_ctrlrange
+ actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.
+ if self.relative_control:
+ actuation_center = np.zeros_like(action)
+ for i in range(self.sim.data.ctrl.shape[0]):
+ actuation_center[i] = self.sim.data.get_joint_qpos(
+ self.sim.model.actuator_names[i].replace(':A_', ':'))
+ for joint_name in ['FF', 'MF', 'RF', 'LF']:
+ act_idx = self.sim.model.actuator_name2id(
+ 'robot0:A_{}J1'.format(joint_name))
+ actuation_center[act_idx] += self.sim.data.get_joint_qpos(
+ 'robot0:{}J0'.format(joint_name))
+ else:
+ actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.
+ self.sim.data.ctrl[:] = actuation_center + action * actuation_range
+ self.sim.data.ctrl[:] = np.clip(self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1])
+
+ def _viewer_setup(self):
+ body_id = self.sim.model.body_name2id('robot0:palm')
+ lookat = self.sim.data.body_xpos[body_id]
+ for idx, value in enumerate(lookat):
+ self.viewer.cam.lookat[idx] = value
+ self.viewer.cam.distance = 0.5
+ self.viewer.cam.azimuth = 55.
+ self.viewer.cam.elevation = -25.
diff --git a/gym-grasp/gym_grasp/envs/robot_env.py b/gym-grasp/gym_grasp/envs/robot_env.py
new file mode 100644
index 0000000000..6d0714026a
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/robot_env.py
@@ -0,0 +1,162 @@
+import os
+import copy
+import numpy as np
+
+import gym
+from gym import error, spaces
+from gym.utils import seeding
+
+try:
+ import mujoco_py
+except ImportError as e:
+ raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
+
+
+class RobotEnv(gym.GoalEnv):
+ def __init__(self, model_path, initial_qpos, n_actions, n_substeps):
+ if model_path.startswith('/'):
+ fullpath = model_path
+ else:
+ fullpath = os.path.join(os.path.dirname(__file__), 'assets', model_path)
+ if not os.path.exists(fullpath):
+ raise IOError('File {} does not exist'.format(fullpath))
+
+ model = mujoco_py.load_model_from_path(fullpath)
+ self.sim = mujoco_py.MjSim(model, nsubsteps=n_substeps)
+ self.viewer = None
+
+ self.metadata = {
+ 'render.modes': ['human', 'rgb_array'],
+ 'video.frames_per_second': int(np.round(1.0 / self.dt))
+ }
+
+ self.seed()
+ self._env_setup(initial_qpos=initial_qpos)
+ self.initial_state = copy.deepcopy(self.sim.get_state())
+
+ self.goal = self._sample_goal()
+ obs = self._get_obs()
+ self.action_space = spaces.Box(-1., 1., shape=(n_actions,), dtype='float32')
+ self.observation_space = spaces.Dict(dict(
+ desired_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
+ achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
+ observation=spaces.Box(-np.inf, np.inf, shape=obs['observation'].shape, dtype='float32'),
+ ))
+
+ @property
+ def dt(self):
+ return self.sim.model.opt.timestep * self.sim.nsubsteps
+
+ # Env methods
+ # ----------------------------
+
+ def seed(self, seed=None):
+ self.np_random, seed = seeding.np_random(seed)
+ return [seed]
+
+ def step(self, action):
+ action = np.clip(action, self.action_space.low, self.action_space.high)
+ self._set_action(action)
+ self.sim.step()
+ self._step_callback()
+ obs = self._get_obs()
+
+ done = False
+ info = {
+ 'is_success': self._is_success(obs['achieved_goal'], self.goal),
+ }
+ reward = self.compute_reward(obs['achieved_goal'], self.goal, info)
+ return obs, reward, done, info
+
+ def reset(self):
+ # Attempt to reset the simulator. Since we randomize initial conditions, it
+ # is possible to get into a state with numerical issues (e.g. due to penetration or
+ # Gimbel lock) or we may not achieve an initial condition (e.g. an object is within the hand).
+ # In this case, we just keep randomizing until we eventually achieve a valid initial
+ # configuration.
+ did_reset_sim = False
+ while not did_reset_sim:
+ did_reset_sim = self._reset_sim()
+ self.goal = self._sample_goal().copy()
+ obs = self._get_obs()
+ return obs
+
+ def close(self):
+ if self.viewer is not None:
+ # self.viewer.finish()
+ self.viewer = None
+
+ def render(self, mode='human'):
+ self._render_callback()
+ if mode == 'rgb_array':
+ self._get_viewer().render()
+ # window size used for old mujoco-py:
+ width, height = 500, 500
+ data = self._get_viewer().read_pixels(width, height, depth=False)
+ # original image is upside-down, so flip it
+ return data[::-1, :, :]
+ elif mode == 'human':
+ self._get_viewer().render()
+
+ def _get_viewer(self):
+ if self.viewer is None:
+ self.viewer = mujoco_py.MjViewer(self.sim)
+ self._viewer_setup()
+ return self.viewer
+
+ # Extension methods
+ # ----------------------------
+
+ def _reset_sim(self):
+ """Resets a simulation and indicates whether or not it was successful.
+ If a reset was unsuccessful (e.g. if a randomized state caused an error in the
+ simulation), this method should indicate such a failure by returning False.
+ In such a case, this method will be called again to attempt a the reset again.
+ """
+ self.sim.set_state(self.initial_state)
+ self.sim.forward()
+ return True
+
+ def _get_obs(self):
+ """Returns the observation.
+ """
+ raise NotImplementedError()
+
+ def _set_action(self, action):
+ """Applies the given action to the simulation.
+ """
+ raise NotImplementedError()
+
+ def _is_success(self, achieved_goal, desired_goal):
+ """Indicates whether or not the achieved goal successfully achieved the desired goal.
+ """
+ raise NotImplementedError()
+
+ def _sample_goal(self):
+ """Samples a new goal and returns it.
+ """
+ raise NotImplementedError()
+
+ def _env_setup(self, initial_qpos):
+ """Initial configuration of the environment. Can be used to configure initial state
+ and extract information from the simulation.
+ """
+ pass
+
+ def _viewer_setup(self):
+ """Initial configuration of the viewer. Can be used to set the camera position,
+ for example.
+ """
+ pass
+
+ def _render_callback(self):
+ """A custom callback that is called before rendering. Can be used
+ to implement custom visualizations.
+ """
+ pass
+
+ def _step_callback(self):
+ """A custom callback that is called after stepping the simulation. Can be used
+ to enforce additional constraints on the simulation state.
+ """
+ pass
diff --git a/gym-grasp/gym_grasp/envs/rotations.py b/gym-grasp/gym_grasp/envs/rotations.py
new file mode 100644
index 0000000000..4aafb64a08
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/rotations.py
@@ -0,0 +1,369 @@
+# Copyright (c) 2009-2017, Matthew Brett and Christoph Gohlke
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Many methods borrow heavily or entirely from transforms3d:
+# https://github.com/matthew-brett/transforms3d
+# They have mostly been modified to support batched operations.
+
+import numpy as np
+import itertools
+
+'''
+Rotations
+=========
+
+Note: these have caused many subtle bugs in the past.
+Be careful while updating these methods and while using them in clever ways.
+
+See MuJoCo documentation here: http://mujoco.org/book/modeling.html#COrientation
+
+Conventions
+-----------
+ - All functions accept batches as well as individual rotations
+ - All rotation conventions match respective MuJoCo defaults
+ - All angles are in radians
+ - Matricies follow LR convention
+ - Euler Angles are all relative with 'xyz' axes ordering
+ - See specific representation for more information
+
+Representations
+---------------
+
+Euler
+ There are many euler angle frames -- here we will strive to use the default
+ in MuJoCo, which is eulerseq='xyz'.
+ This frame is a relative rotating frame, about x, y, and z axes in order.
+ Relative rotating means that after we rotate about x, then we use the
+ new (rotated) y, and the same for z.
+
+Quaternions
+ These are defined in terms of rotation (angle) about a unit vector (x, y, z)
+ We use the following convention:
+ q0 = cos(angle / 2)
+ q1 = sin(angle / 2) * x
+ q2 = sin(angle / 2) * y
+ q3 = sin(angle / 2) * z
+ This is also sometimes called qw, qx, qy, qz.
+ Note that quaternions are ambiguous, because we can represent a rotation by
+ angle about vector and -angle about vector <-x, -y, -z>.
+ To choose between these, we pick "first nonzero positive", where we
+ make the first nonzero element of the quaternion positive.
+ This can result in mismatches if you're converting an quaternion that is not
+ "first nonzero positive" to a different representation and back.
+
+Axis Angle
+ (Not currently implemented)
+ These are very straightforward. Rotation is angle about a unit vector.
+
+XY Axes
+ (Not currently implemented)
+ We are given x axis and y axis, and z axis is cross product of x and y.
+
+Z Axis
+ This is NOT RECOMMENDED. Defines a unit vector for the Z axis,
+ but rotation about this axis is not well defined.
+ Instead pick a fixed reference direction for another axis (e.g. X)
+ and calculate the other (e.g. Y = Z cross-product X),
+ then use XY Axes rotation instead.
+
+SO3
+ (Not currently implemented)
+ While not supported by MuJoCo, this representation has a lot of nice features.
+ We expect to add support for these in the future.
+
+TODO / Missing
+--------------
+ - Rotation integration or derivatives (e.g. velocity conversions)
+ - More representations (SO3, etc)
+ - Random sampling (e.g. sample uniform random rotation)
+ - Performance benchmarks/measurements
+ - (Maybe) define everything as to/from matricies, for simplicity
+'''
+
+# For testing whether a number is close to zero
+_FLOAT_EPS = np.finfo(np.float64).eps
+_EPS4 = _FLOAT_EPS * 4.0
+
+
+def euler2mat(euler):
+ """ Convert Euler Angles to Rotation Matrix. See rotation.py for notes """
+ euler = np.asarray(euler, dtype=np.float64)
+ assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler)
+
+ ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0]
+ si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
+ ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
+ cc, cs = ci * ck, ci * sk
+ sc, ss = si * ck, si * sk
+
+ mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64)
+ mat[..., 2, 2] = cj * ck
+ mat[..., 2, 1] = sj * sc - cs
+ mat[..., 2, 0] = sj * cc + ss
+ mat[..., 1, 2] = cj * sk
+ mat[..., 1, 1] = sj * ss + cc
+ mat[..., 1, 0] = sj * cs - sc
+ mat[..., 0, 2] = -sj
+ mat[..., 0, 1] = cj * si
+ mat[..., 0, 0] = cj * ci
+ return mat
+
+
+def euler2quat(euler):
+ """ Convert Euler Angles to Quaternions. See rotation.py for notes """
+ euler = np.asarray(euler, dtype=np.float64)
+ assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler)
+
+ ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2
+ si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
+ ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
+ cc, cs = ci * ck, ci * sk
+ sc, ss = si * ck, si * sk
+
+ quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64)
+ quat[..., 0] = cj * cc + sj * ss
+ quat[..., 3] = cj * sc - sj * cs
+ quat[..., 2] = -(cj * ss + sj * cc)
+ quat[..., 1] = cj * cs - sj * sc
+ return quat
+
+
+def mat2euler(mat):
+ """ Convert Rotation Matrix to Euler Angles. See rotation.py for notes """
+ mat = np.asarray(mat, dtype=np.float64)
+ assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
+
+ cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2])
+ condition = cy > _EPS4
+ euler = np.empty(mat.shape[:-1], dtype=np.float64)
+ euler[..., 2] = np.where(condition,
+ -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]),
+ -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]))
+ euler[..., 1] = np.where(condition,
+ -np.arctan2(-mat[..., 0, 2], cy),
+ -np.arctan2(-mat[..., 0, 2], cy))
+ euler[..., 0] = np.where(condition,
+ -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]),
+ 0.0)
+ return euler
+
+
+def mat2quat(mat):
+ """ Convert Rotation Matrix to Quaternion. See rotation.py for notes """
+ mat = np.asarray(mat, dtype=np.float64)
+ assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
+
+ Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2]
+ Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2]
+ Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2]
+ # Fill only lower half of symmetric matrix
+ K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64)
+ K[..., 0, 0] = Qxx - Qyy - Qzz
+ K[..., 1, 0] = Qyx + Qxy
+ K[..., 1, 1] = Qyy - Qxx - Qzz
+ K[..., 2, 0] = Qzx + Qxz
+ K[..., 2, 1] = Qzy + Qyz
+ K[..., 2, 2] = Qzz - Qxx - Qyy
+ K[..., 3, 0] = Qyz - Qzy
+ K[..., 3, 1] = Qzx - Qxz
+ K[..., 3, 2] = Qxy - Qyx
+ K[..., 3, 3] = Qxx + Qyy + Qzz
+ K /= 3.0
+ # TODO: vectorize this -- probably could be made faster
+ q = np.empty(K.shape[:-2] + (4,))
+ it = np.nditer(q[..., 0], flags=['multi_index'])
+ while not it.finished:
+ # Use Hermitian eigenvectors, values for speed
+ vals, vecs = np.linalg.eigh(K[it.multi_index])
+ # Select largest eigenvector, reorder to w,x,y,z quaternion
+ q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)]
+ # Prefer quaternion with positive w
+ # (q * -1 corresponds to same rotation as q)
+ if q[it.multi_index][0] < 0:
+ q[it.multi_index] *= -1
+ it.iternext()
+ return q
+
+
+def quat2euler(quat):
+ """ Convert Quaternion to Euler Angles. See rotation.py for notes """
+ return mat2euler(quat2mat(quat))
+
+
+def subtract_euler(e1, e2):
+ assert e1.shape == e2.shape
+ assert e1.shape[-1] == 3
+ q1 = euler2quat(e1)
+ q2 = euler2quat(e2)
+ q_diff = quat_mul(q1, quat_conjugate(q2))
+ return quat2euler(q_diff)
+
+
+def quat2mat(quat):
+ """ Convert Quaternion to Euler Angles. See rotation.py for notes """
+ quat = np.asarray(quat, dtype=np.float64)
+ assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat)
+
+ w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3]
+ Nq = np.sum(quat * quat, axis=-1)
+ s = 2.0 / Nq
+ X, Y, Z = x * s, y * s, z * s
+ wX, wY, wZ = w * X, w * Y, w * Z
+ xX, xY, xZ = x * X, x * Y, x * Z
+ yY, yZ, zZ = y * Y, y * Z, z * Z
+
+ mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64)
+ mat[..., 0, 0] = 1.0 - (yY + zZ)
+ mat[..., 0, 1] = xY - wZ
+ mat[..., 0, 2] = xZ + wY
+ mat[..., 1, 0] = xY + wZ
+ mat[..., 1, 1] = 1.0 - (xX + zZ)
+ mat[..., 1, 2] = yZ - wX
+ mat[..., 2, 0] = xZ - wY
+ mat[..., 2, 1] = yZ + wX
+ mat[..., 2, 2] = 1.0 - (xX + yY)
+ return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3))
+
+def quat_conjugate(q):
+ inv_q = -q
+ inv_q[..., 0] *= -1
+ return inv_q
+
+def quat_mul(q0, q1):
+ assert q0.shape == q1.shape
+ assert q0.shape[-1] == 4
+ assert q1.shape[-1] == 4
+
+ w0 = q0[..., 0]
+ x0 = q0[..., 1]
+ y0 = q0[..., 2]
+ z0 = q0[..., 3]
+
+ w1 = q1[..., 0]
+ x1 = q1[..., 1]
+ y1 = q1[..., 2]
+ z1 = q1[..., 3]
+
+ w = w0 * w1 - x0 * x1 - y0 * y1 - z0 * z1
+ x = w0 * x1 + x0 * w1 + y0 * z1 - z0 * y1
+ y = w0 * y1 + y0 * w1 + z0 * x1 - x0 * z1
+ z = w0 * z1 + z0 * w1 + x0 * y1 - y0 * x1
+ q = np.array([w, x, y, z])
+ if q.ndim == 2:
+ q = q.swapaxes(0, 1)
+ assert q.shape == q0.shape
+ return q
+
+def quat_rot_vec(q, v0):
+ q_v0 = np.array([0, v0[0], v0[1], v0[2]])
+ q_v = quat_mul(q, quat_mul(q_v0, quat_conjugate(q)))
+ v = q_v[1:]
+ return v
+
+def quat_identity():
+ return np.array([1, 0, 0, 0])
+
+def quat2axisangle(quat):
+ theta = 0;
+ axis = np.array([0, 0, 1]);
+ sin_theta = np.linalg.norm(quat[1:])
+
+ if (sin_theta > 0.0001):
+ theta = 2 * np.arcsin(sin_theta)
+ theta *= 1 if quat[0] >= 0 else -1
+ axis = quat[1:] / sin_theta
+
+ return axis, theta
+
+def euler2point_euler(euler):
+ _euler = euler.copy()
+ if len(_euler.shape) < 2:
+ _euler = np.expand_dims(_euler,0)
+ assert(_euler.shape[1] == 3)
+ _euler_sin = np.sin(_euler)
+ _euler_cos = np.cos(_euler)
+ return np.concatenate([_euler_sin, _euler_cos], axis=-1)
+
+def point_euler2euler(euler):
+ _euler = euler.copy()
+ if len(_euler.shape) < 2:
+ _euler = np.expand_dims(_euler,0)
+ assert(_euler.shape[1] == 6)
+ angle = np.arctan(_euler[..., :3] / _euler[..., 3:])
+ angle[_euler[..., 3:] < 0] += np.pi
+ return angle
+
+def quat2point_quat(quat):
+ # Should be in qw, qx, qy, qz
+ _quat = quat.copy()
+ if len(_quat.shape) < 2:
+ _quat = np.expand_dims(_quat, 0)
+ assert(_quat.shape[1] == 4)
+ angle = np.arccos(_quat[:,[0]]) * 2
+ xyz = _quat[:, 1:]
+ xyz[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5] = (xyz / np.sin(angle / 2))[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5]
+ return np.concatenate([np.sin(angle),np.cos(angle), xyz], axis=-1)
+
+def point_quat2quat(quat):
+ _quat = quat.copy()
+ if len(_quat.shape) < 2:
+ _quat = np.expand_dims(_quat, 0)
+ assert(_quat.shape[1] == 5)
+ angle = np.arctan(_quat[:,[0]] / _quat[:,[1]])
+ qw = np.cos(angle / 2)
+
+ qxyz = _quat[:, 2:]
+ qxyz[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5] = (qxyz * np.sin(angle/2))[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5]
+ return np.concatenate([qw, qxyz], axis=-1)
+
+def normalize_angles(angles):
+ '''Puts angles in [-pi, pi] range.'''
+ angles = angles.copy()
+ if angles.size > 0:
+ angles = (angles + np.pi) % (2 * np.pi) - np.pi
+ assert -np.pi-1e-6 <= angles.min() and angles.max() <= np.pi+1e-6
+ return angles
+
+def round_to_straight_angles(angles):
+ '''Returns closest angle modulo 90 degrees '''
+ angles = np.round(angles / (np.pi / 2)) * (np.pi / 2)
+ return normalize_angles(angles)
+
+def get_parallel_rotations():
+ mult90 = [0, np.pi/2, -np.pi/2, np.pi]
+ parallel_rotations = []
+ for euler in itertools.product(mult90, repeat=3):
+ canonical = mat2euler(euler2mat(euler))
+ canonical = np.round(canonical / (np.pi / 2))
+ if canonical[0] == -2:
+ canonical[0] = 2
+ if canonical[2] == -2:
+ canonical[2] = 2
+ canonical *= np.pi / 2
+ if all([(canonical != rot).any() for rot in parallel_rotations]):
+ parallel_rotations += [canonical]
+ assert len(parallel_rotations) == 24
+ return parallel_rotations
diff --git a/gym-grasp/gym_grasp/envs/utils.py b/gym-grasp/gym_grasp/envs/utils.py
new file mode 100644
index 0000000000..a73e5f6052
--- /dev/null
+++ b/gym-grasp/gym_grasp/envs/utils.py
@@ -0,0 +1,96 @@
+import numpy as np
+
+from gym import error
+try:
+ import mujoco_py
+except ImportError as e:
+ raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
+
+
+def robot_get_obs(sim):
+ """Returns all joint positions and velocities associated with
+ a robot.
+ """
+ if sim.data.qpos is not None and sim.model.joint_names:
+ names = [n for n in sim.model.joint_names if n.startswith('robot')]
+ return (
+ np.array([sim.data.get_joint_qpos(name) for name in names]),
+ np.array([sim.data.get_joint_qvel(name) for name in names]),
+ )
+ return np.zeros(0), np.zeros(0)
+
+
+def ctrl_set_action(sim, action):
+ """For torque actuators it copies the action into mujoco ctrl field.
+ For position actuators it sets the target relative to the current qpos.
+ """
+ if sim.model.nmocap > 0:
+ _, action = np.split(action, (sim.model.nmocap * 7, ))
+ if sim.data.ctrl is not None:
+ for i in range(action.shape[0]):
+ if sim.model.actuator_biastype[i] == 0:
+ sim.data.ctrl[i] = action[i]
+ else:
+ idx = sim.model.jnt_qposadr[sim.model.actuator_trnid[i, 0]]
+ sim.data.ctrl[i] = sim.data.qpos[idx] + action[i]
+
+
+def mocap_set_action(sim, action):
+ """The action controls the robot using mocaps. Specifically, bodies
+ on the robot (for example the gripper wrist) is controlled with
+ mocap bodies. In this case the action is the desired difference
+ in position and orientation (quaternion), in world coordinates,
+ of the of the target body. The mocap is positioned relative to
+ the target body according to the delta, and the MuJoCo equality
+ constraint optimizer tries to center the welded body on the mocap.
+ """
+ if sim.model.nmocap > 0:
+ action, _ = np.split(action, (sim.model.nmocap * 7, ))
+ action = action.reshape(sim.model.nmocap, 7)
+
+ pos_delta = action[:, :3]
+ quat_delta = action[:, 3:]
+
+ reset_mocap2body_xpos(sim)
+ sim.data.mocap_pos[:] = sim.data.mocap_pos + pos_delta
+ sim.data.mocap_quat[:] = sim.data.mocap_quat + quat_delta
+
+
+def reset_mocap_welds(sim):
+ """Resets the mocap welds that we use for actuation.
+ """
+ if sim.model.nmocap > 0 and sim.model.eq_data is not None:
+ for i in range(sim.model.eq_data.shape[0]):
+ if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD:
+ sim.model.eq_data[i, :] = np.array(
+ [0., 0., 0., 1., 0., 0., 0.])
+ sim.forward()
+
+
+def reset_mocap2body_xpos(sim):
+ """Resets the position and orientation of the mocap bodies to the same
+ values as the bodies they're welded to.
+ """
+
+ if (sim.model.eq_type is None or
+ sim.model.eq_obj1id is None or
+ sim.model.eq_obj2id is None):
+ return
+ for eq_type, obj1_id, obj2_id in zip(sim.model.eq_type,
+ sim.model.eq_obj1id,
+ sim.model.eq_obj2id):
+ if eq_type != mujoco_py.const.EQ_WELD:
+ continue
+
+ mocap_id = sim.model.body_mocapid[obj1_id]
+ if mocap_id != -1:
+ # obj1 is the mocap, obj2 is the welded body
+ body_idx = obj2_id
+ else:
+ # obj2 is the mocap, obj1 is the welded body
+ mocap_id = sim.model.body_mocapid[obj2_id]
+ body_idx = obj1_id
+
+ assert (mocap_id != -1)
+ sim.data.mocap_pos[mocap_id][:] = sim.data.body_xpos[body_idx]
+ sim.data.mocap_quat[mocap_id][:] = sim.data.body_xquat[body_idx]
diff --git a/gym-grasp/setup.py b/gym-grasp/setup.py
new file mode 100644
index 0000000000..16314b2f17
--- /dev/null
+++ b/gym-grasp/setup.py
@@ -0,0 +1,10 @@
+from setuptools import setup
+
+setup(name='gym_grasp',
+ version='0.0.1',
+ install_requires=['gym>=0.2.3',
+ 'mujoco_py>=1.50'],
+ package_data={'gym_grasp' : [
+ 'envs/assets/hand/*.xml'
+ ]}
+)
diff --git a/mujoco-py b/mujoco-py
new file mode 160000
index 0000000000..54367d181b
--- /dev/null
+++ b/mujoco-py
@@ -0,0 +1 @@
+Subproject commit 54367d181b4335b42a0f094274a07b21352af9f2
diff --git a/projection/Dockerfile b/projection/Dockerfile
new file mode 100644
index 0000000000..ec19fa6e41
--- /dev/null
+++ b/projection/Dockerfile
@@ -0,0 +1,24 @@
+FROM chainer/chainer:v4.5.0-python3
+MAINTAINER Yoshimura Naoya
+
+# Emacsのインストール
+RUN apt-get update
+RUN apt-get install emacs24-nox -y
+
+
+# Install Chainer
+RUN pip3 install jupyter \
+ && jupyter notebook --generate-config
+RUN echo 'alias python=python3' >> ~/.bashrc \
+ && echo 'alias pip=pip3' >> ~/.bashrc
+RUN pip3 install --upgrade pip
+
+# Install Python Module
+COPY requirements.txt /root
+RUN pip install -r /root/requirements.txt
+
+
+
+# Finish
+RUN mkdir /root/work
+WORKDIR /root/work
diff --git a/projection/MAKE_CONTAINER.sh b/projection/MAKE_CONTAINER.sh
new file mode 100644
index 0000000000..14a99f8ce0
--- /dev/null
+++ b/projection/MAKE_CONTAINER.sh
@@ -0,0 +1,18 @@
+mode=$1
+
+if [ ${mode} = 0 ];
+then
+ DIR_CODE="/home/yoshimura/code708/synergy"
+ DIR_DATA="/home/yoshimura/code708/dataStore"
+ NAME="synergy"
+ IMAGE="yoshimura/synergy:v4.5.0"
+ PORT_NOTE=7088
+ PORT_TFB=7086
+
+ docker run --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=0 \
+ -v ${DIR_CODE}:/root/work \
+ -v ${DIR_DATA}:/root/dataStore \
+ --name ${NAME} \
+ -p ${PORT_NOTE}:8888 -p ${PORT_TFB}:6006 \
+ -it ${IMAGE} jupyter notebook --allow-root --ip 0.0.0.0
+fi
diff --git a/projection/README.md b/projection/README.md
new file mode 100644
index 0000000000..da5f8fcc85
--- /dev/null
+++ b/projection/README.md
@@ -0,0 +1,58 @@
+# Projection Network with Chainer
+中枢神経系 (action) からPre-moter Nueronへの投射を行うネットワークの学習.
+
+
+## Installation
+### Requirements
++ chainer
++ cupy
+
+### Docker Setup
+Fist, build docker image with this command.
+
+```
+$ docker build -t synergy/chainer:v4.5.0 .
+```
+
+And then make container,
+
+```
+docker run --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=${GPU} \
+ -v ${DIR_code}:/root/work \
+ -v ${DIR_DATA}:/root/dataStore \
+ --name ${NAME} \
+ -p ${PORT_NOTE} -p ${PORT_TFB} \
+ -it ${IMAGE} jupyter notebook --allow-root --ip 0.0.0.0
+```
+
+## Data Preparation
+### Resampling
+Use `utils/make_inputs.py`. After this, split data for`train/val/test` by yourself.
+
+
+```
+python3 make_inputs.py \
+ --path-in /root/dataStore/grasp_v1/episodes \
+ --path-out /root/dataStore/grasp_v1/Inputs
+```
+
+## Training
+note: Set correct paths!
+
+
+```
+python3 run.py TRAIN \
+ --path-data-train /root/dataStore/grasp_v1/Inputs/train \
+ --path-data-val /root/dataStore/grasp_v1/Inputs/val \
+ --path-model /root/dataStore/grasp_v1/Log/ChainerDenseNet.model \
+ --path-log /root/dataStore/grasp_v1/Log/ \
+ --gpu 0 \
+ --batch-size 64 \
+ --epoch 10
+
+```
+
+
+
+## Prediction (Generation)
+TBA
diff --git a/projection/dataset/default.py b/projection/dataset/default.py
new file mode 100644
index 0000000000..c38028e2c6
--- /dev/null
+++ b/projection/dataset/default.py
@@ -0,0 +1,50 @@
+import os
+import numpy as np
+import h5py
+
+
+from logging import getLogger, basicConfig, DEBUG
+logger = getLogger(__name__)
+
+# Chainer
+import chainer
+
+# ---------------------------------------------------------
+class DefaultDataset(chainer.dataset.DatasetMixin):
+ """ Default Dataset Object for Multi-Class Classification
+ """
+
+ def __init__(self, file_list):
+ """
+ Args.
+ -----
+ - file_list :list of input files (+.h5)
+ """
+ X, Y = [], []
+ for path in file_list:
+ if not os.path.exists(path):
+ logger.warning("File does not exsists! [path={}]".format(path))
+ continue
+ X_tmp, Y_tmp = self.load_file(path)
+ X.append(X_tmp)
+ Y.append(Y_tmp)
+ self.X = np.concatenate(X, axis=0)
+ self.Y = np.concatenate(Y, axis=0)
+ logger.info("Success: X={}, Y={}".format(self.X.shape, self.Y.shape))
+
+
+ def load_file(self, path):
+ with h5py.File(path, 'r') as f:
+ X = np.array(f["fc"],)
+ Y = np.array(f['action/resampled'],)
+
+ xshape, yshape = X.shape, Y.shape
+ X, Y = X.reshape((-1, xshape[-1])), Y.reshape((-1,yshape[-1]))
+ return X, Y
+
+
+ def __len__(self):
+ return len(self.X)
+
+ def get_example(self, i):
+ return self.X[i], self.Y[i]
diff --git a/projection/models/dense.py b/projection/models/dense.py
new file mode 100644
index 0000000000..daac5806e7
--- /dev/null
+++ b/projection/models/dense.py
@@ -0,0 +1,52 @@
+import chainer
+import chainer.links as L
+import chainer.functions as F
+
+import numpy as np
+import cupy as cp
+
+
+
+class DenseNet(chainer.Chain):
+ """
+ Reference.
+ ----------
+ - "Deep Convolutional and LSTM Recurrent Neural Networks for Multimodal Wearable Activity Recognition"
+ [www.mdpi.com/1424-8220/16/1/115/pdf]
+ - Baseline CNN
+ """
+ """
+ Args.
+ -----
+ - n_in : int, Input dim (=X.shape[-1])
+ - n_out : int, Output dim (=Y.shape[-1])
+ """
+ def __init__(self, n_in=None, n_out=None):
+ super(DenseNet, self,).__init__()
+ with self.init_scope():
+ # FC
+ self.fc1 = L.Linear(n_in, 32)
+ self.fc2 = L.Linear(32, 32)
+ self.fc3 = L.Linear(32, n_out)
+
+ def __call__(self, x):
+ # Full Connected
+ h1 = F.dropout(F.relu(self.fc1(x)))
+ h2 = F.dropout(F.relu(self.fc2(h1)))
+ h3 = F.tanh(self.fc3(h2))
+ return h3
+
+
+ def get_inter_layer(self, x):
+ h1 = F.relu(self.fc1(x))
+ h2 = F.relu(self.fc2(h1))
+ h3 = F.tanh(self.fc3(h2))
+
+ ret = {
+ "h1": h1,
+ "h2": h2,
+ "h3": h3,
+ }
+
+ return h3, ret
+
diff --git a/projection/notebook/01_Resampling_of_Actions.ipynb b/projection/notebook/01_Resampling_of_Actions.ipynb
new file mode 100644
index 0000000000..92ccfb1b66
--- /dev/null
+++ b/projection/notebook/01_Resampling_of_Actions.ipynb
@@ -0,0 +1,873 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 01: Critic Networkの出力の量子化"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 概要\n",
+ "+ Critic Networkはロボットハンドの各時間ごとの角度を出力する.\n",
+ "+ そのまま学習しても投射にならないため, 角度を曲げる, 伸ばす, そのままに変換する [-1,0,1,].\n",
+ "\n",
+ "### ToDo\n",
+ "+ シンプルに曲げる/曲げない/そのままでOKなのか?\n",
+ "+ 値の変化幅を決めるために, 各時刻毎の値の差分の分布を確認する."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1-1: 差分の分布の確認"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(10, 2, 100, 21)\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import h5py\n",
+ "\n",
+ "filename = os.path.join(\"/root/dataStore\", \"grasp_v1\", \"episodes\", \"epoch0.h5\")\n",
+ "with h5py.File(filename, 'r') as f:\n",
+ " A = np.array(f[\"action\"],)\n",
+ " \n",
+ "print(A.shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(1980, 21)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "sns.set(\"notebook\", \"whitegrid\", font_scale=1.5)\n",
+ "\n",
+ "X = A[:,:,1:,:] - A[:,:,:-1,:]\n",
+ "X = X.reshape((-1,X.shape[-1],))\n",
+ "print(X.shape)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ "