diff --git a/MAKING_A_COGAME.md b/MAKING_A_COGAME.md index 22589c2db..475e19798 100644 --- a/MAKING_A_COGAME.md +++ b/MAKING_A_COGAME.md @@ -187,7 +187,7 @@ HardOreMineMission = OreMineMission( ```bash cogames tutorial train \ --mission my_module.OreMineMission \ - --timesteps 2000000 + --steps 2000000 ``` CoGames uses [PufferLib](https://github.com/PufferAI/PufferLib) for training — PPO with parallel vectorized environments. The trained checkpoint loads back for play or eval: diff --git a/pyproject.toml b/pyproject.toml index f9b4383db..c9ed337d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" requires-python = ">=3.12,<3.13" classifiers = ["Programming Language :: Python :: 3", "Programming Language :: Python :: 3.12"] dependencies = [ - "mettagrid==0.26.12", + "mettagrid==0.26.13", "softmax-cli==0.26.5", "packaging>=24.0.0", "pydantic>=2.11.5", @@ -75,8 +75,8 @@ testpaths = ["tests"] source = ["cogames"] [tool.uv.sources] -mettagrid = { workspace = true } -softmax-cli = { workspace = true } +mettagrid = {git = "https://github.com/Metta-AI/mettagrid.git"} +softmax-cli = {git = "https://github.com/Metta-AI/softmax-cli.git"} diplomacog = { git = "https://github.com/Metta-AI/cogame-diplomacog.git" } hungercog = { git = "https://github.com/Metta-AI/cogame-hungercog.git" } overcogged = { git = "https://github.com/Metta-AI/cogame-overcogged.git" } diff --git a/src/cogames/docs/AMONGTHEM_POLICY.md b/src/cogames/docs/AMONGTHEM_POLICY.md index b94f3dc74..62808e537 100644 --- a/src/cogames/docs/AMONGTHEM_POLICY.md +++ b/src/cogames/docs/AMONGTHEM_POLICY.md @@ -17,13 +17,14 @@ that will run in the tournament worker, uploading it, and checking whether it sc cogames tutorial make-policy --amongthem -o amongthem_policy.py ``` -Edit `AmongThemPolicy._choose_actions()` in the generated file. The template receives raw BitWorld observations and must -write integer action indices from the BitWorld trainable action set into `raw_actions`. +Edit `AmongThemPolicy._choose_actions()` in the generated file. The template receives raw BitWorld observations and +must return an `np.ndarray` of integer action indices from the BitWorld trainable action set; the parent +`step_batch()` writes those into `raw_actions`. ## 2. Log in and pick a season ```bash -cogames login +cogames auth login cogames season list cogames season show ``` diff --git a/tutorials/02_TRAIN.ipynb b/tutorials/02_TRAIN.ipynb index 58f9d4c86..6d958cb6b 100644 --- a/tutorials/02_TRAIN.ipynb +++ b/tutorials/02_TRAIN.ipynb @@ -78,7 +78,7 @@ "Training on mission: arena\n", "...progress logs...\n", "Training complete. Checkpoints saved to: ./train_dir\n", - "Checkpoint saved to: ./train_dir//model_000001.pt\n", + "Final checkpoint: ./train_dir//model_000001.pt\n", "```\n", "\n", "Replace `` with your actual run ID from your training output.\n", diff --git a/tutorials/02_TRAIN.md b/tutorials/02_TRAIN.md index af67fe118..77eab6476 100644 --- a/tutorials/02_TRAIN.md +++ b/tutorials/02_TRAIN.md @@ -40,7 +40,7 @@ Expected terminal output (example): Training on mission: arena ...progress logs... Training complete. Checkpoints saved to: ./train_dir -Checkpoint saved to: ./train_dir//model_000001.pt +Final checkpoint: ./train_dir//model_000001.pt ``` Replace `` with your actual run ID from your training output. diff --git a/tutorials/02_TRAIN.py b/tutorials/02_TRAIN.py index 1cb0ea89b..e4d3bd409 100644 --- a/tutorials/02_TRAIN.py +++ b/tutorials/02_TRAIN.py @@ -61,7 +61,7 @@ # Training on mission: arena # ...progress logs... # Training complete. Checkpoints saved to: ./train_dir -# Checkpoint saved to: ./train_dir//model_000001.pt +# Final checkpoint: ./train_dir//model_000001.pt # ``` # # Replace `` with your actual run ID from your training output.