From 36b3e6b7d2ba7c1e03b7ae19cc888b45ff00043c Mon Sep 17 00:00:00 2001 From: Karan Singh Date: Sat, 28 Mar 2026 18:10:07 +0530 Subject: [PATCH] Add offline transcript auto edit cleanup --- speaktype/Services/WhisperService.swift | 80 +++++++++++++++++++ .../Views/Screens/Settings/SettingsView.swift | 61 ++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/speaktype/Services/WhisperService.swift b/speaktype/Services/WhisperService.swift index f19659f..e4645af 100644 --- a/speaktype/Services/WhisperService.swift +++ b/speaktype/Services/WhisperService.swift @@ -5,11 +5,15 @@ import WhisperKit class WhisperService { // Shared singleton instance - use this everywhere static let shared = WhisperService() + private static let autoEditEnabledKey = "enableAutoEdit" + private static let customReplacementRulesKey = "customReplacementRules" private static let placeholderPatterns = [ #"\[(?:BLANK_AUDIO|SILENCE)\]"#, #"<\|nospeech\|>"#, #"\[\s*S\s*\]"#, ] + private static let fillerWordPattern = + #"(?i)(^|[\s,.;:!?])(?:uh+|um+|umm+|uhm+|erm+|hmm+)(?=$|[\s,.;:!?])[,.;:!?]?"# private static let noiseLabelTerms = [ "applause", "background noise", @@ -247,6 +251,82 @@ class WhisperService { options: .regularExpression ) + normalized = applyAutoEdit(to: normalized) + return normalized.trimmingCharacters(in: .whitespacesAndNewlines) } + + private struct AutoEditRule { + let source: String + let replacement: String + } + + private static func applyAutoEdit(to text: String) -> String { + guard UserDefaults.standard.bool(forKey: autoEditEnabledKey) else { + return text.trimmingCharacters(in: .whitespacesAndNewlines) + } + + var edited = text.replacingOccurrences( + of: fillerWordPattern, + with: "$1", + options: .regularExpression + ) + + for rule in customReplacementRules() { + edited = replace(rule.source, with: rule.replacement, in: edited) + } + + edited = edited.replacingOccurrences( + of: #"\s+([,.;:!?])"#, + with: "$1", + options: .regularExpression + ) + edited = edited.replacingOccurrences( + of: #"\s+"#, + with: " ", + options: .regularExpression + ) + return edited.trimmingCharacters(in: .whitespacesAndNewlines) + } + + private static func customReplacementRules() -> [AutoEditRule] { + let rawRules = UserDefaults.standard.string(forKey: customReplacementRulesKey) ?? "" + + return rawRules + .split(whereSeparator: \.isNewline) + .compactMap { rawLine in + let line = rawLine.trimmingCharacters(in: .whitespacesAndNewlines) + guard !line.isEmpty else { return nil } + + for separator in ["=>", "->", "="] { + let parts = line.components(separatedBy: separator) + guard parts.count >= 2 else { continue } + + let source = parts[0].trimmingCharacters(in: .whitespacesAndNewlines) + let replacement = parts[1...].joined(separator: separator) + .trimmingCharacters(in: .whitespacesAndNewlines) + + guard !source.isEmpty else { return nil } + return AutoEditRule(source: source, replacement: replacement) + } + + return nil + } + } + + private static func replace(_ source: String, with replacement: String, in text: String) -> String { + let escapedSource = NSRegularExpression.escapedPattern(for: source) + .replacingOccurrences(of: " ", with: #"\s+"#) + let needsLeadingBoundary = source.first?.isLetter == true || source.first?.isNumber == true + let needsTrailingBoundary = source.last?.isLetter == true || source.last?.isNumber == true + let pattern = + "\(needsLeadingBoundary ? #"\b"# : "")\(escapedSource)\(needsTrailingBoundary ? #"\b"# : "")" + + guard let regex = try? NSRegularExpression(pattern: pattern, options: [.caseInsensitive]) else { + return text + } + + let range = NSRange(text.startIndex..., in: text) + return regex.stringByReplacingMatches(in: text, options: [], range: range, withTemplate: replacement) + } } diff --git a/speaktype/Views/Screens/Settings/SettingsView.swift b/speaktype/Views/Screens/Settings/SettingsView.swift index 256dd22..ae8a5c6 100644 --- a/speaktype/Views/Screens/Settings/SettingsView.swift +++ b/speaktype/Views/Screens/Settings/SettingsView.swift @@ -92,6 +92,8 @@ struct GeneralSettingsTab: View { @AppStorage("showMenuBarIcon") private var showMenuBarIcon: Bool = true @AppStorage("transcriptionLanguage") private var transcriptionLanguage: String = "auto" @AppStorage("recentTranscriptionLanguages") private var recentLanguagesString: String = "" + @AppStorage("enableAutoEdit") private var enableAutoEdit: Bool = false + @AppStorage("customReplacementRules") private var customReplacementRules: String = "" private var recentLanguageCodes: [String] { recentLanguagesString.split(separator: ",").map(String.init).filter { !$0.isEmpty } @@ -210,6 +212,65 @@ struct GeneralSettingsTab: View { } } + // Transcript Cleanup + SettingsSection { + SettingsSectionHeader( + icon: "wand.and.stars", + title: "Transcript Cleanup", + subtitle: "Lightweight post-processing for dictation" + ) + + VStack(alignment: .leading, spacing: 14) { + HStack { + Text("Enable Auto Edit") + .font(Typography.bodyMedium) + .foregroundStyle(Color.textPrimary) + Spacer() + Toggle("", isOn: $enableAutoEdit) + .labelsHidden() + } + + Text( + "Auto Edit removes common filler words like \"um\" and \"uh\" after transcription. It stays fully offline and does not rewrite the meaning of what you said." + ) + .font(Typography.captionSmall) + .foregroundStyle(Color.textMuted) + + VStack(alignment: .leading, spacing: 8) { + Text("Custom replacements") + .font(Typography.bodyMedium) + .foregroundStyle(Color.textPrimary) + + ZStack(alignment: .topLeading) { + RoundedRectangle(cornerRadius: 10) + .fill(Color.bgHover) + + if customReplacementRules.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + Text("teh => the\nspeak type => SpeakType\nuh huh =>") + .font(.system(size: 12, design: .monospaced)) + .foregroundStyle(Color.textMuted) + .padding(.horizontal, 12) + .padding(.vertical, 10) + .allowsHitTesting(false) + } + + TextEditor(text: $customReplacementRules) + .font(.system(size: 12, design: .monospaced)) + .scrollContentBackground(.hidden) + .padding(.horizontal, 8) + .padding(.vertical, 6) + } + .frame(minHeight: 110) + .opacity(enableAutoEdit ? 1.0 : 0.65) + + Text("One rule per line using `from => to`. Leave the right side blank to delete a phrase.") + .font(Typography.captionSmall) + .foregroundStyle(Color.textMuted) + } + .disabled(!enableAutoEdit) + } + } + // Spoken Language SettingsSection { SettingsSectionHeader(