Skip to content

Commit fba29fc

Browse files
authored
refactor(compress): unify sync/async paths via generator (#5)
compressSync and compressAsync were identical (~180 lines each) except for 2 summarize call sites. Replace both with a single compressGen generator that yields summarize requests, driven by thin sync/async runners. Removes 149 lines of duplication, no public API changes.
1 parent a823b1d commit fba29fc

1 file changed

Lines changed: 38 additions & 187 deletions

File tree

src/compress.ts

Lines changed: 38 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -614,190 +614,10 @@ function computeStats(
614614
}
615615

616616
// ---------------------------------------------------------------------------
617-
// Sync compression (internal)
617+
// Unified compression core (generator + sync/async runners)
618618
// ---------------------------------------------------------------------------
619619

620-
function compressSync(messages: Message[], options: CompressOptions = {}): CompressResult {
621-
const sourceVersion = options.sourceVersion ?? 0;
622-
const counter = options.tokenCounter ?? defaultTokenCounter;
623-
624-
if (messages.length === 0) {
625-
return {
626-
messages: [],
627-
compression: {
628-
original_version: sourceVersion,
629-
ratio: 1,
630-
token_ratio: 1,
631-
messages_compressed: 0,
632-
messages_preserved: 0,
633-
},
634-
verbatim: {},
635-
};
636-
}
637-
638-
const preserveRoles = new Set(options.preserve ?? ['system']);
639-
const recencyWindow = options.recencyWindow ?? 4;
640-
const recencyStart = Math.max(0, messages.length - (recencyWindow > 0 ? recencyWindow : 0));
641-
let dedupAnnotations =
642-
(options.dedup ?? true) ? analyzeDuplicates(messages, recencyStart, preserveRoles) : undefined;
643-
644-
if (options.fuzzyDedup) {
645-
const fuzzyAnnotations = analyzeFuzzyDuplicates(
646-
messages,
647-
recencyStart,
648-
preserveRoles,
649-
dedupAnnotations ?? new Map(),
650-
options.fuzzyThreshold ?? 0.85,
651-
);
652-
if (fuzzyAnnotations.size > 0) {
653-
if (!dedupAnnotations) dedupAnnotations = new Map();
654-
for (const [idx, ann] of fuzzyAnnotations) {
655-
dedupAnnotations.set(idx, ann);
656-
}
657-
}
658-
}
659-
660-
const classified = classifyAll(messages, preserveRoles, recencyWindow, dedupAnnotations);
661-
662-
const result: Message[] = [];
663-
const verbatim: Record<string, Message> = {};
664-
let messagesCompressed = 0;
665-
let messagesPreserved = 0;
666-
let messagesDeduped = 0;
667-
let messagesFuzzyDeduped = 0;
668-
let i = 0;
669-
670-
while (i < classified.length) {
671-
const { msg, preserved } = classified[i];
672-
673-
if (preserved) {
674-
result.push(msg);
675-
messagesPreserved++;
676-
i++;
677-
continue;
678-
}
679-
680-
// Dedup: replace earlier duplicate/near-duplicate with compact reference
681-
if (classified[i].dedup) {
682-
const annotation = classified[i].dedup!;
683-
const keepTargetId = messages[annotation.duplicateOfIndex].id;
684-
const tag =
685-
annotation.similarity != null
686-
? `[cce:near-dup of ${keepTargetId}${annotation.contentLength} chars, ~${Math.round(annotation.similarity * 100)}% match]`
687-
: `[cce:dup of ${keepTargetId}${annotation.contentLength} chars]`;
688-
result.push(buildCompressedMessage(msg, [msg.id], tag, sourceVersion, verbatim, [msg]));
689-
if (annotation.similarity != null) {
690-
messagesFuzzyDeduped++;
691-
} else {
692-
messagesDeduped++;
693-
}
694-
i++;
695-
continue;
696-
}
697-
698-
// Code-split: extract fences verbatim, summarize surrounding prose
699-
if (classified[i].codeSplit) {
700-
const content = typeof msg.content === 'string' ? msg.content : '';
701-
const segments = splitCodeAndProse(content);
702-
const proseText = segments
703-
.filter((s) => s.type === 'prose')
704-
.map((s) => s.content)
705-
.join(' ');
706-
const codeFences = segments.filter((s) => s.type === 'code').map((s) => s.content);
707-
const proseBudget = proseText.length < 600 ? 200 : 400;
708-
const summaryText = summarize(proseText, proseBudget);
709-
const embeddedId = options.embedSummaryId ? makeSummaryId([msg.id]) : undefined;
710-
const compressed = `${formatSummary(summaryText, proseText, undefined, true, embeddedId)}\n\n${codeFences.join('\n\n')}`;
711-
712-
if (compressed.length >= content.length) {
713-
result.push(msg);
714-
messagesPreserved++;
715-
i++;
716-
continue;
717-
}
718-
719-
result.push(
720-
buildCompressedMessage(msg, [msg.id], compressed, sourceVersion, verbatim, [msg]),
721-
);
722-
messagesCompressed++;
723-
i++;
724-
continue;
725-
}
726-
727-
// Collect consecutive non-preserved messages with the SAME role
728-
const { group, nextIdx } = collectGroup(classified, i);
729-
i = nextIdx;
730-
731-
const allContent = group
732-
.map((g) => (typeof g.msg.content === 'string' ? g.msg.content : ''))
733-
.join(' ');
734-
const contentBudget = allContent.length < 600 ? 200 : 400;
735-
const summaryText = isStructuredOutput(allContent)
736-
? summarizeStructured(allContent, contentBudget)
737-
: summarize(allContent, contentBudget);
738-
739-
if (group.length > 1) {
740-
const mergeIds = group.map((g) => g.msg.id);
741-
const embeddedId = options.embedSummaryId ? makeSummaryId(mergeIds) : undefined;
742-
let summary = formatSummary(summaryText, allContent, group.length, undefined, embeddedId);
743-
const combinedLength = group.reduce((sum, g) => sum + contentLength(g.msg), 0);
744-
if (summary.length >= combinedLength) {
745-
summary = formatSummary(summaryText, allContent, group.length, true, embeddedId);
746-
}
747-
748-
if (summary.length >= combinedLength) {
749-
for (const g of group) {
750-
result.push(g.msg);
751-
messagesPreserved++;
752-
}
753-
} else {
754-
const sourceMsgs = group.map((g) => g.msg);
755-
const base: Message = { ...sourceMsgs[0] };
756-
result.push(
757-
buildCompressedMessage(base, mergeIds, summary, sourceVersion, verbatim, sourceMsgs),
758-
);
759-
messagesCompressed += group.length;
760-
}
761-
} else {
762-
const single = group[0].msg;
763-
const content = typeof single.content === 'string' ? single.content : '';
764-
const embeddedId = options.embedSummaryId ? makeSummaryId([single.id]) : undefined;
765-
let summary = formatSummary(summaryText, allContent, undefined, undefined, embeddedId);
766-
if (summary.length >= content.length) {
767-
summary = formatSummary(summaryText, allContent, undefined, true, embeddedId);
768-
}
769-
770-
if (summary.length >= content.length) {
771-
result.push(single);
772-
messagesPreserved++;
773-
} else {
774-
result.push(
775-
buildCompressedMessage(single, [single.id], summary, sourceVersion, verbatim, [single]),
776-
);
777-
messagesCompressed++;
778-
}
779-
}
780-
}
781-
782-
return {
783-
messages: result,
784-
compression: computeStats(
785-
messages,
786-
result,
787-
messagesCompressed,
788-
messagesPreserved,
789-
sourceVersion,
790-
counter,
791-
messagesDeduped,
792-
messagesFuzzyDeduped,
793-
),
794-
verbatim,
795-
};
796-
}
797-
798-
// ---------------------------------------------------------------------------
799-
// Async compression (internal, LLM summarizer support)
800-
// ---------------------------------------------------------------------------
620+
type SummarizeRequest = { text: string; budget: number };
801621

802622
async function withFallback(
803623
text: string,
@@ -816,13 +636,12 @@ async function withFallback(
816636
return summarize(text, maxBudget);
817637
}
818638

819-
async function compressAsync(
639+
function* compressGen(
820640
messages: Message[],
821641
options: CompressOptions = {},
822-
): Promise<CompressResult> {
642+
): Generator<SummarizeRequest, CompressResult, string> {
823643
const sourceVersion = options.sourceVersion ?? 0;
824644
const counter = options.tokenCounter ?? defaultTokenCounter;
825-
const userSummarizer = options.summarizer;
826645

827646
if (messages.length === 0) {
828647
return {
@@ -908,7 +727,7 @@ async function compressAsync(
908727
.join(' ');
909728
const codeFences = segments.filter((s) => s.type === 'code').map((s) => s.content);
910729
const proseBudget = proseText.length < 600 ? 200 : 400;
911-
const summaryText = await withFallback(proseText, userSummarizer, proseBudget);
730+
const summaryText: string = yield { text: proseText, budget: proseBudget };
912731
const embeddedId = options.embedSummaryId ? makeSummaryId([msg.id]) : undefined;
913732
const compressed = `${formatSummary(summaryText, proseText, undefined, true, embeddedId)}\n\n${codeFences.join('\n\n')}`;
914733

@@ -937,7 +756,7 @@ async function compressAsync(
937756
const contentBudget = allContent.length < 600 ? 200 : 400;
938757
const summaryText = isStructuredOutput(allContent)
939758
? summarizeStructured(allContent, contentBudget)
940-
: await withFallback(allContent, userSummarizer, contentBudget);
759+
: yield { text: allContent, budget: contentBudget };
941760

942761
if (group.length > 1) {
943762
const mergeIds = group.map((g) => g.msg.id);
@@ -998,6 +817,38 @@ async function compressAsync(
998817
};
999818
}
1000819

820+
function runCompressSync(gen: Generator<SummarizeRequest, CompressResult, string>): CompressResult {
821+
let next = gen.next();
822+
while (!next.done) {
823+
const { text, budget } = next.value;
824+
next = gen.next(summarize(text, budget));
825+
}
826+
return next.value;
827+
}
828+
829+
async function runCompressAsync(
830+
gen: Generator<SummarizeRequest, CompressResult, string>,
831+
userSummarizer?: Summarizer,
832+
): Promise<CompressResult> {
833+
let next = gen.next();
834+
while (!next.done) {
835+
const { text, budget } = next.value;
836+
next = gen.next(await withFallback(text, userSummarizer, budget));
837+
}
838+
return next.value;
839+
}
840+
841+
function compressSync(messages: Message[], options: CompressOptions = {}): CompressResult {
842+
return runCompressSync(compressGen(messages, options));
843+
}
844+
845+
async function compressAsync(
846+
messages: Message[],
847+
options: CompressOptions = {},
848+
): Promise<CompressResult> {
849+
return runCompressAsync(compressGen(messages, options), options.summarizer);
850+
}
851+
1001852
// ---------------------------------------------------------------------------
1002853
// Token budget helpers (absorbed from compressToFit)
1003854
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)