Skip to content

Commit 7d2b4b2

Browse files
llm.omc: add best_of_n, parallel_cot, improve_until, debate
- best_of_n(prompt, n, criteria, system?, model?): parallel batch_llm_call + llm_judge - parallel_cot(prompt, n, model?): N-way CoT fired via batch_llm_call, majority vote - improve_until(text, criteria, threshold, max_rounds, model?): convergence loop with llm_judge - debate(topic, rounds, model?): two-agent argue + judge pattern Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 06420ca commit 7d2b4b2

1 file changed

Lines changed: 148 additions & 0 deletions

File tree

examples/lib/llm.omc

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,3 +271,151 @@ fn gen_and_test(description, test_fn, model, max_attempts) {
271271
}
272272
return {code: code, passed: false, attempts: max_att}
273273
}
274+
275+
# ── Best-of-N with LLM judge ────────────────────────────────────────────────
276+
277+
# best_of_n(prompt, n, criteria, system?, model?) → {answer, score, idx}
278+
# Generates N responses in parallel, judges all with llm_judge, returns best.
279+
fn best_of_n(prompt, n, criteria, system, model) {
280+
h m = _default(model, null)
281+
h sys = _default(system, null)
282+
h prompts = arr_fill({prompt: prompt, system: sys}, n)
283+
h responses = batch_llm_call(prompts, m)
284+
h scores = llm_judge(responses, criteria, m)
285+
h best_score = -1.0
286+
h best_idx = 0
287+
h i = 0
288+
while i < arr_len(scores) {
289+
if scores[i]["score"] > best_score {
290+
best_score = scores[i]["score"]
291+
best_idx = scores[i]["idx"]
292+
}
293+
i = i + 1
294+
}
295+
return {answer: responses[best_idx], score: best_score, idx: best_idx}
296+
}
297+
298+
# ── Parallel Self-Consistency CoT ───────────────────────────────────────────
299+
300+
# parallel_cot(prompt, n, model?) → {answer, confidence, all_answers}
301+
# Like cot_verify but fires all N calls in parallel using batch_llm_call.
302+
fn parallel_cot(prompt, n, model) {
303+
h m = _default(model, null)
304+
h sys = "Think step by step. After reasoning, write 'ANSWER:' followed by your final answer on its own line."
305+
h full = str_concat("Question: ", prompt, "\n\nReason carefully, then write ANSWER: <your answer>")
306+
h prompts = arr_fill({prompt: full, system: sys}, n)
307+
h raw_responses = batch_llm_call(prompts, m)
308+
h votes = {}
309+
h all_answers = []
310+
h i = 0
311+
while i < arr_len(raw_responses) {
312+
h parts = str_split(raw_responses[i], "ANSWER:")
313+
h ans = ""
314+
if arr_len(parts) >= 2 {
315+
ans = str_trim(parts[arr_len(parts) - 1])
316+
} else {
317+
ans = str_trim(raw_responses[i])
318+
}
319+
h short_ans = str_slice(ans, 0, 100)
320+
arr_push(all_answers, short_ans)
321+
if dict_has(votes, short_ans) {
322+
votes[short_ans] = votes[short_ans] + 1
323+
} else {
324+
votes[short_ans] = 1
325+
}
326+
i = i + 1
327+
}
328+
h best = ""
329+
h best_count = 0
330+
h keys = dict_keys(votes)
331+
h k = 0
332+
while k < arr_len(keys) {
333+
if votes[keys[k]] > best_count {
334+
best = keys[k]
335+
best_count = votes[keys[k]]
336+
}
337+
k = k + 1
338+
}
339+
return {answer: best, confidence: best_count / n, all_answers: all_answers, votes: votes}
340+
}
341+
342+
# ── Improve Until Convergence ───────────────────────────────────────────────
343+
344+
# improve_until(text, criteria, threshold, max_rounds, model?) → {text, score, rounds}
345+
# Repeatedly generates an improved version and judges it; stops when score >= threshold.
346+
fn improve_until(text, criteria, threshold, max_rounds, model) {
347+
h m = _default(model, null)
348+
h current = text
349+
h round = 0
350+
h score = 0.0
351+
h sys = "Improve the following text according to the given criteria. Return only the improved version."
352+
while round < max_rounds {
353+
h improve_prompt = str_concat(
354+
"Improve this based on: ", criteria, "\n\n", current
355+
)
356+
h candidate = llm_call(improve_prompt, m, sys)
357+
h judgment = llm_judge([current, candidate], criteria, m)
358+
h candidate_score = 0.0
359+
h original_score = 0.0
360+
h ji = 0
361+
while ji < arr_len(judgment) {
362+
if judgment[ji]["idx"] == 0 { original_score = judgment[ji]["score"] }
363+
if judgment[ji]["idx"] == 1 { candidate_score = judgment[ji]["score"] }
364+
ji = ji + 1
365+
}
366+
if candidate_score > original_score {
367+
current = candidate
368+
score = candidate_score
369+
} else {
370+
score = original_score
371+
}
372+
if score >= threshold { break }
373+
round = round + 1
374+
}
375+
return {text: current, score: score, rounds: round + 1}
376+
}
377+
378+
# ── Debate: two agents argue, judge picks winner ────────────────────────────
379+
380+
# debate(topic, rounds, model?) → {winner: "for"|"against", reasoning, transcript}
381+
fn debate(topic, rounds, model) {
382+
h m = _default(model, null)
383+
h for_sys = str_concat("You argue FOR: ", topic, ". Be concise and compelling.")
384+
h against_sys = str_concat("You argue AGAINST: ", topic, ". Be concise and compelling.")
385+
h for_args = []
386+
h against_args = []
387+
h transcript = []
388+
h r = 0
389+
while r < rounds {
390+
h context = str_concat("Topic: ", topic)
391+
if arr_len(for_args) > 0 {
392+
context = str_concat(context, "\n\nPrevious FOR arguments: ", for_args[arr_len(for_args) - 1])
393+
context = str_concat(context, "\nPrevious AGAINST arguments: ", against_args[arr_len(against_args) - 1])
394+
}
395+
h both = batch_llm_call([
396+
{prompt: context, system: for_sys},
397+
{prompt: context, system: against_sys}
398+
], m)
399+
arr_push(for_args, both[0])
400+
arr_push(against_args, both[1])
401+
arr_push(transcript, {round: r + 1, for: both[0], against: both[1]})
402+
r = r + 1
403+
}
404+
h judge_sys = "You are an impartial debate judge. Output JSON: {\"winner\": \"for\" or \"against\", \"reasoning\": \"...\"}"
405+
h judge_prompt = str_concat(
406+
"Topic: ", topic, "\n\n",
407+
"FOR argument:\n", for_args[arr_len(for_args) - 1], "\n\n",
408+
"AGAINST argument:\n", against_args[arr_len(against_args) - 1], "\n\n",
409+
"Who made the stronger case?"
410+
)
411+
h verdict_raw = llm_call(judge_prompt, m, judge_sys)
412+
h verdict = extract_json(verdict_raw)
413+
if verdict == null {
414+
verdict = {winner: "tie", reasoning: verdict_raw}
415+
}
416+
return {
417+
winner: verdict["winner"],
418+
reasoning: verdict["reasoning"],
419+
transcript: transcript
420+
}
421+
}

0 commit comments

Comments
 (0)