We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c746af2 commit 436071fCopy full SHA for 436071f
1 file changed
agentlightning/verl/trainer.py
@@ -417,10 +417,14 @@ def _train_step(self, batch_dict: dict) -> dict:
417
print(batch.batch.keys())
418
inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True)
419
outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True)
420
+ sample_gts = [
421
+ item.non_tensor_batch.get("reward_model", {}).get("ground_truth", None) for item in batch
422
+ ]
423
scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist()
424
self._dump_generations(
425
inputs=inputs,
426
outputs=outputs,
427
+ gts=sample_gts,
428
scores=scores,
429
reward_extra_infos_dict=reward_extra_infos_dict,
430
dump_path=rollout_data_dir,
0 commit comments