diff --git a/sample_convo.jsonl b/sample_convo.jsonl new file mode 100644 index 0000000..5d26282 --- /dev/null +++ b/sample_convo.jsonl @@ -0,0 +1,7 @@ +{"role":"user","text":"Hey, can you help me refactor a function?"} +{"role":"user","text":"It's a bit slow and hard to read."} +{"role":"agent","text":"Sure—paste the function, and I'll propose a clearer version."} +{"role":"user","text":"def f(a,b):\n c=0\n for i in range(len(a)):\n c+=a[i]*b[i]\n return c"} +{"role":"agent","text":"I recommend naming and iterating directly over pairs."} +{"role":"agent","text":"Also add type hints and guard clauses."} +{"role":"user","text":"Great—can you show me the final function?"} diff --git a/sample_convo.weave.txt b/sample_convo.weave.txt new file mode 100644 index 0000000..9e728dc --- /dev/null +++ b/sample_convo.weave.txt @@ -0,0 +1,73 @@ +#startblock type: observation +#index 82 +#timestamp 1759961326.107705 +#time_remaining 20000.0 seconds +#block_size I have 768 tokens (full) to write with +#title WeaveDiscordClient (discord-bot-1325039818673094739) + +# '''Messages: +# 2025-10-08 22:08 : Hey, can you help me refactor a function? +# 2025-10-08 22:08 : It's a bit slow and hard to read. +# ''' + +#endblock + +#subagent main +#startblock type: action +#index 83 +#timestamp 1759961343.107705 +#time_remaining 19940.0 seconds +#block_size I have 768 tokens (full) to write with + +def action_send_discord_messages(self): + client = self.tools[self.get_cache('client_id')] + client.send_message(": Sure—paste the function, and I'll propose a clearer version.") + +self.add_action('Send Discord messages', action_send_discord_messages) +#endblock + +#startblock type: observation +#index 84 +#timestamp 1759961360.107705 +#time_remaining 19880.0 seconds +#block_size I have 768 tokens (full) to write with +#title WeaveDiscordClient (discord-bot-1325039818673094739) + +# '''Messages: +# 2025-10-08 22:09 : +# def f(a,b): +# c=0 +# for i in range(len(a)): +# c+=a[i]*b[i] +# return c +# ''' + +#endblock + +#subagent main +#startblock type: action +#index 85 +#timestamp 1759961377.107705 +#time_remaining 19820.0 seconds +#block_size I have 768 tokens (full) to write with + +def action_send_discord_messages(self): + client = self.tools[self.get_cache('client_id')] + client.send_message(": I recommend naming and iterating directly over pairs.") + client.send_message(": Also add type hints and guard clauses.") + +self.add_action('Send Discord messages', action_send_discord_messages) +#endblock + +#startblock type: observation +#index 86 +#timestamp 1759961394.107705 +#time_remaining 19760.0 seconds +#block_size I have 768 tokens (full) to write with +#title WeaveDiscordClient (discord-bot-1325039818673094739) + +# '''Messages: +# 2025-10-08 22:09 : Great—can you show me the final function? +# ''' + +#endblock diff --git a/weave_agent_converter.py b/weave_agent_converter.py new file mode 100644 index 0000000..1204a61 --- /dev/null +++ b/weave_agent_converter.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +import argparse +import json +import sys +from pathlib import Path +from datetime import datetime, timezone, timedelta +from typing import List, Dict, Any + +# Minimal converter: JSONL (role,text) -> fake weave-agent blocks +# - Contiguous messages with the same role are grouped into a single "turn" +# - user turns => observation blocks containing a '# '''Messages:' transcript +# - agent turns => action blocks that send Discord messages via a fake client + +BLOCK_SIZE_TOKENS = 768 +DEFAULT_TIME_REMAINING_START = 20000.0 +DEFAULT_TIME_DECREMENT_PER_BLOCK = 60.0 + + +def read_jsonl(path: Path) -> List[Dict[str, Any]]: + entries: List[Dict[str, Any]] = [] + with path.open('r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON on line: {line}\n{e}") + if not isinstance(obj, dict): + raise ValueError(f"JSONL entries must be objects: {line}") + role = obj.get('role') + text = obj.get('text') + if role not in ('user', 'agent'): + raise ValueError(f"role must be 'user' or 'agent': {obj}") + if not isinstance(text, str): + raise ValueError(f"text must be a string: {obj}") + entries.append({'role': role, 'text': text}) + return entries + + +def group_contiguous_turns(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + turns: List[Dict[str, Any]] = [] + current_role = None + buf: List[str] = [] + for m in messages: + if current_role is None: + current_role = m['role'] + buf = [m['text']] + continue + if m['role'] == current_role: + buf.append(m['text']) + else: + turns.append({'role': current_role, 'texts': buf[:]}) + current_role = m['role'] + buf = [m['text']] + if current_role is not None: + turns.append({'role': current_role, 'texts': buf[:]}) + return turns + + +def format_timestamp(ts: float) -> str: + dt = datetime.fromtimestamp(ts, tz=timezone.utc).astimezone() + # Example format similar to sample: 2025-10-04 07:13 + return dt.strftime('%Y-%m-%d %H:%M') + + +def render_observation_block(index: int, ts: float, time_remaining: float, bot_title: str, + user_label: str, texts: List[str]) -> str: + lines: List[str] = [] + lines.append('#startblock type: observation') + lines.append(f'#index {index}') + lines.append(f'#timestamp {ts}') + lines.append(f'#time_remaining {time_remaining} seconds') + lines.append(f'#block_size I have {BLOCK_SIZE_TOKENS} tokens (full) to write with') + lines.append(f'#title {bot_title}') + lines.append('') + lines.append("# '''Messages:") + stamp = format_timestamp(ts) + for text in texts: + # Split multi-line while keeping message grouping + sublines = text.splitlines() or [''] + if len(sublines) == 1: + lines.append(f'# {stamp} <{user_label}>: {sublines[0]}') + else: + lines.append(f'# {stamp} <{user_label}>:') + for sl in sublines: + lines.append(f'# {sl}') + lines.append("# '''") + lines.append('') + lines.append('#endblock') + return '\n'.join(lines) + + +def escape_triple_quotes(s: str) -> str: + return s.replace('"""', '\\"\\"\\"') + + +def render_action_block(index: int, ts: float, time_remaining: float, bot_name: str, texts: List[str]) -> str: + lines: List[str] = [] + lines.append('#subagent main') + lines.append('#startblock type: action') + lines.append(f'#index {index}') + lines.append(f'#timestamp {ts}') + lines.append(f'#time_remaining {time_remaining} seconds') + lines.append(f'#block_size I have {BLOCK_SIZE_TOKENS} tokens (full) to write with') + lines.append('') + lines.append('def action_send_discord_messages(self):') + lines.append(" client = self.tools[self.get_cache('client_id')]") + # Represent each message as a literal string + for text in texts: + safe = escape_triple_quotes(text) + # Prefix with bot display in content like sample: ": ..." + lines.append(f" client.send_message(\"<{bot_name}>: {safe}\")") + lines.append('') + lines.append("self.add_action('Send Discord messages', action_send_discord_messages)") + lines.append('#endblock') + return '\n'.join(lines) + + +def convert(jsonl_path: Path, output_path: Path, + bot_name: str, user_name: str, + start_index: int, start_timestamp: float, + title: str) -> None: + msgs = read_jsonl(jsonl_path) + turns = group_contiguous_turns(msgs) + + time_remaining = DEFAULT_TIME_REMAINING_START + block_index = start_index + ts = start_timestamp + + out_lines: List[str] = [] + for turn in turns: + role = turn['role'] + texts = turn['texts'] + if role == 'user': + out_lines.append( + render_observation_block( + block_index, ts, time_remaining, title, user_name, texts + ) + ) + else: + out_lines.append( + render_action_block( + block_index, ts, time_remaining, bot_name, texts + ) + ) + block_index += 1 + ts += 17.0 # arbitrary step between blocks + time_remaining = max(0.0, time_remaining - DEFAULT_TIME_DECREMENT_PER_BLOCK) + out_lines.append('') # blank line between blocks + + output_path.write_text('\n'.join(out_lines).rstrip() + '\n', encoding='utf-8') + + +def main(argv: List[str]) -> int: + p = argparse.ArgumentParser(description='Convert role/text JSONL to weave-agent blocks (Discord style).') + p.add_argument('--input', required=True, help='Path to input JSONL with role/text entries') + p.add_argument('--output', required=True, help='Path to write weave-agent blocks text') + p.add_argument('--bot-name', default='Weaver', help='Displayed bot name in messages') + p.add_argument('--user-name', default='user', help='Displayed user name in observation messages') + p.add_argument('--start-index', type=int, default=1, help='Starting block index') + p.add_argument('--start-timestamp', type=float, default=None, help='Starting epoch timestamp (float)') + p.add_argument('--bot-id', default='discord-bot-000000000000000000', help='Discord bot id shown in title') + args = p.parse_args(argv) + + in_path = Path(args.input) + out_path = Path(args.output) + if args.start_timestamp is None: + # Use local now as baseline + now = datetime.now(tz=timezone.utc) + start_ts = now.timestamp() + else: + start_ts = float(args.start_timestamp) + + title = f"WeaveDiscordClient ({args.bot_id})" + convert(in_path, out_path, args.bot_name, args.user_name, args.start_index, start_ts, title) + return 0 + + +if __name__ == '__main__': + raise SystemExit(main(sys.argv[1:]))