TSP Environment Rendering

Here we demonstrate some additional usage of graphenv in visualizing environment solutions

[1]:
import subprocess
from copy import copy
from itertools import chain
from pathlib import Path
from tempfile import TemporaryDirectory

import numpy as np
from graphenv.examples.tsp.graph_utils import make_complete_planar_graph
from graphenv.examples.tsp.tsp_state import TSPState
from graphenv.graph_env import GraphEnv

%matplotlib inline
[2]:
N = 50

# Create the TSPState and GraphEnv objects
state = TSPState(lambda: make_complete_planar_graph(N, seed=1))
env = GraphEnv(
    {
        "state": state,
        "max_num_children": N,
    }
)
[3]:
%%capture

# Reset the environment and initialize the observation, reward, and done fields
obs, info = env.reset()
greedy_reward = 0
done = False

with TemporaryDirectory(dir=".") as tmpdir:
    while not done:

        # Select the action with the minimum distance from the parent node
        action = np.argmin([x["parent_dist"] for x in obs[1:]])

        # Get the observation for the next set of candidate nodes,
        # incremental reward, and done flags
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated

        # Append the step's reward to the running total
        greedy_reward += reward

        # Render the intermediate environment and save as an image
        fig, ax = env.render()
        ax.set_title(f"distance = {greedy_reward:.3f}")
        fig.savefig(Path(tmpdir, f"img{len(env.state.tour):01d}.jpg"), dpi=100)

    # Convert the list of frames to a video
    subprocess.run(
        "ffmpeg -i img%01d.jpg -vcodec libx264 -acodec libfaac -y ../movie.mp4",
        shell=True,
        check=True,
        cwd=tmpdir,
        capture_output=True,
    )

    subprocess.run(
        "ffmpeg -i img%01d.jpg -vcodec libvpx -acodec libvorbis -y ../movie.webm",
        shell=True,
        check=True,
        cwd=tmpdir,
        stdout=None,
        capture_output=True,
    )