Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion examples/zcr_main/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct sense_voice_params {
std::string openvino_encode_device = "CPU";
std::vector<std::string> fname_inp = {};
std::vector<std::string> fname_out = {};
std::string outfile = "";
};

static int sense_voice_has_coreml(void) {
Expand Down Expand Up @@ -107,6 +108,7 @@ static void sense_voice_print_usage(int /*argc*/, char **argv, const sense_voice
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -fa, --flash-attn [%-7s] flash attention\n", params.flash_attn ? "true" : "false");
fprintf(stderr, " -itn, --use-itn [%-7s] use itn\n", params.use_itn ? "true" : "false");
fprintf(stderr, " -fout --outfile [%s] output file path\n", params.outfile.c_str());
fprintf(stderr, " --chunk_size [%-7lu] vad chunk size(ms)\n", params.chunk_size);
fprintf(stderr, " -mmc --min-mute-chunks [%-7lu] When consecutive chunks are identified as silence\n", params.min_mute_chunks);
fprintf(stderr, " -mnc --max-nomute-chunks [%-7lu] when the first non-silent chunk is too far away\n", params.max_nomute_chunks);
Expand Down Expand Up @@ -166,7 +168,7 @@ static bool sense_voice_params_parse(int argc, char **argv, sense_voice_params &
} else if (arg == "-of" || arg == "--output-file") {
params.fname_out.emplace_back(argv[++i]);
} else if (arg == "-np" || arg == "--no-prints") {
params.no_prints = false;
params.no_prints = true;
} else if (arg == "-l" || arg == "--language") {
params.language = sense_voice_param_turn_lowercase(argv[++i]);
} else if (arg == "--prompt") {
Expand Down Expand Up @@ -195,6 +197,8 @@ static bool sense_voice_params_parse(int argc, char **argv, sense_voice_params &
params.max_batch = std::stoi(argv[++i]);
} else if (arg == "--chunk_size") {
params.chunk_size = std::stoi(argv[++i]);
} else if (arg == "--outfile" || arg == "-fout") {
params.outfile = argv[++i];
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
sense_voice_print_usage(argc, argv, params);
Expand Down Expand Up @@ -385,6 +389,11 @@ int main(int argc, char **argv) {
exit(0);
}

if (!params.outfile.empty()) {
freopen(params.outfile.c_str(), "w", stdout);
params.use_prefix = false;
}

// sense-voice init

struct sense_voice_context_params cparams = sense_voice_context_default_params();
Expand Down
4 changes: 2 additions & 2 deletions sense-voice/csrc/sense-voice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -808,11 +808,11 @@ int sense_voice_batch_pcm_to_feature_with_state(struct sense_voice_context *ctx,
state->feature.frame_size,
state->feature.frame_step,
state->feature.n_mel,
n_threads, true, cmvn, state->feature);
n_threads, false, cmvn, state->feature);
state->feature.input_data.insert(state->feature.input_data.end(), state->feature.data.begin(), state->feature.data.end());
}

state->t_feature_us = ggml_time_us() - t_start_us;
state->t_feature_us += ggml_time_us() - t_start_us;

// set input
{
Expand Down
Loading