diff --git a/dataflow/cli_funcs/cli_pdf.py b/dataflow/cli_funcs/cli_pdf.py index 4b4d8f21..e1e327cf 100644 --- a/dataflow/cli_funcs/cli_pdf.py +++ b/dataflow/cli_funcs/cli_pdf.py @@ -61,7 +61,7 @@ def get_dataflow_script_path(script_name: str) -> Path: return script_path return None - except: + except Exception: return None @@ -400,7 +400,7 @@ def cli_pdf2model_train(lf_yaml: str = ".cache/train_config.yaml", cache_path: s with open(config_path_obj, 'r', encoding='utf-8') as f: config = yaml.safe_load(f) actual_output_dir = config.get('output_dir', 'unknown') - except: + except Exception: actual_output_dir = 'unknown' print("Training completed successfully!") diff --git a/dataflow/cli_funcs/cli_text.py b/dataflow/cli_funcs/cli_text.py index 05cbfcfa..efa77e87 100644 --- a/dataflow/cli_funcs/cli_text.py +++ b/dataflow/cli_funcs/cli_text.py @@ -61,7 +61,7 @@ def get_dataflow_script_path(script_name: str) -> Path: return script_path return None - except: + except Exception: return None @@ -448,7 +448,7 @@ def cli_text2model_train(input_keys: str = None, lf_yaml: str = "./.cache/train_ file_size = qa_file.stat().st_size print( f"{Fore.GREEN}✅ Step 3 completed: {sample_count} training samples ({file_size} bytes){Style.RESET_ALL}") - except: + except Exception: print(f"{Fore.GREEN}✅ Step 3 completed{Style.RESET_ALL}") # Step 4: Training @@ -506,7 +506,7 @@ def _run_text2qa_workflow(current_dir: Path, cache_path_obj: Path, config_path_o with open(config_path_obj, 'r', encoding='utf-8') as f: config = yaml.safe_load(f) actual_output_dir = config.get('output_dir', 'unknown') - except: + except Exception: actual_output_dir = 'unknown' print("Text2QA training completed successfully!") @@ -552,7 +552,7 @@ def _run_text2qa_workflow(current_dir: Path, cache_path_obj: Path, config_path_o # with open(config_path_obj, 'r', encoding='utf-8') as f: # config = yaml.safe_load(f) # actual_output_dir = config.get('output_dir', 'unknown') -# except: +# except Exception: # actual_output_dir = 'unknown' # print("Text2QA training completed successfully!") diff --git a/dataflow/operators/core_text/eval/bench_dataset_evaluator.py b/dataflow/operators/core_text/eval/bench_dataset_evaluator.py index 56f971cf..9c1a19c9 100644 --- a/dataflow/operators/core_text/eval/bench_dataset_evaluator.py +++ b/dataflow/operators/core_text/eval/bench_dataset_evaluator.py @@ -54,10 +54,10 @@ def __init__(self, def math_verify_compare(self, answer, ground_truth): try: return verify(parse(str(ground_truth)), parse(str(answer))) - except: + except Exception: try: return verify(parse(ground_truth), parse(answer)) - except: + except Exception: return False def ResolveResponse(self, response): diff --git a/dataflow/operators/core_text/eval/bench_dataset_evaluator_question.py b/dataflow/operators/core_text/eval/bench_dataset_evaluator_question.py index 5c23d10d..fc1199e5 100644 --- a/dataflow/operators/core_text/eval/bench_dataset_evaluator_question.py +++ b/dataflow/operators/core_text/eval/bench_dataset_evaluator_question.py @@ -61,10 +61,10 @@ def __init__(self, def math_verify_compare(self, answer, ground_truth): try: return verify(parse(str(ground_truth)), parse(str(answer))) - except: + except Exception: try: return verify(parse(ground_truth), parse(answer)) - except: + except Exception: return False def ResolveResponse(self, response): diff --git a/dataflow/operators/pdf2vqa/generate/llm_output_parser.py b/dataflow/operators/pdf2vqa/generate/llm_output_parser.py index 0f9aeec6..01fb3c44 100644 --- a/dataflow/operators/pdf2vqa/generate/llm_output_parser.py +++ b/dataflow/operators/pdf2vqa/generate/llm_output_parser.py @@ -40,12 +40,12 @@ def _id_to_text(self, input_ids, input_json, image_prefix="images"): for id in id_list: try: int(id) - except: + except Exception: continue if int(id) < len(input_json): try: item = input_json[int(id)] - except: + except Exception: continue if 'text' in item: texts.append(item['text']) @@ -55,13 +55,13 @@ def _id_to_text(self, input_ids, input_json, image_prefix="images"): img_name = os.path.basename(img_path) new_path = f"{image_prefix}/{img_name}" texts.append(f"") - except: + except Exception: pass elif item.get('type','') == 'list': if item['sub_type'] == 'text': try: texts.append(input_json[int(id)]['list_items'].pop(0)) - except: + except Exception: pass return '\n'.join(texts) diff --git a/dataflow/operators/pdf2vqa/generate/mineru_to_llm_input_operator.py b/dataflow/operators/pdf2vqa/generate/mineru_to_llm_input_operator.py index 9ffdc152..cc2e2155 100644 --- a/dataflow/operators/pdf2vqa/generate/mineru_to_llm_input_operator.py +++ b/dataflow/operators/pdf2vqa/generate/mineru_to_llm_input_operator.py @@ -62,7 +62,7 @@ def run(self, storage: DataFlowStorage, md_path = Path(row[input_markdown_path_key]) try: input_json_path = list(md_path.parent.glob("*_content_list.json"))[0] - except: + except Exception: raise ValueError("No _content_list.json file found in the api result. There might be an error with the Mineru api.") converted_path = str(input_json_path).replace('.json', '_converted.json') diff --git a/dataflow/operators/reasoning/filter/reasoning_answer_groundtruth_filter.py b/dataflow/operators/reasoning/filter/reasoning_answer_groundtruth_filter.py index 2d740992..46d67b94 100644 --- a/dataflow/operators/reasoning/filter/reasoning_answer_groundtruth_filter.py +++ b/dataflow/operators/reasoning/filter/reasoning_answer_groundtruth_filter.py @@ -29,10 +29,10 @@ def exact_compare(self, answer, ground_truth): def math_verify_compare(self, answer, ground_truth): try: return verify(parse(str(ground_truth)), parse(str(answer))) - except: + except Exception: try: return verify(parse(ground_truth), parse(answer)) - except: + except Exception: return False @staticmethod diff --git a/dataflow/operators/reasoning/generate/reasoning_question_generator.py b/dataflow/operators/reasoning/generate/reasoning_question_generator.py index fcd28dcd..db13b146 100644 --- a/dataflow/operators/reasoning/generate/reasoning_question_generator.py +++ b/dataflow/operators/reasoning/generate/reasoning_question_generator.py @@ -117,7 +117,7 @@ def _reformat_prompt(self, dataframe): try: used_prompt = self.prompts.build_prompt(question=question) formatted_prompts.append(used_prompt.strip()) - except: + except Exception: self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.") return formatted_prompts diff --git a/dataflow/operators/text2sql/eval/sql_component_classifier.py b/dataflow/operators/text2sql/eval/sql_component_classifier.py index 1c64ff5c..d85990ac 100644 --- a/dataflow/operators/text2sql/eval/sql_component_classifier.py +++ b/dataflow/operators/text2sql/eval/sql_component_classifier.py @@ -271,7 +271,7 @@ def parse_value(self, toks, start_idx, tables_with_alias, schema, default_tables try: val = float(toks[idx]) idx += 1 - except: + except Exception: end_idx = idx while end_idx < len_ and toks[end_idx] not in (',', ')', 'and', *self.CLAUSE_KEYWORDS, *self.JOIN_KEYWORDS): end_idx += 1 diff --git a/dataflow/operators/text_sft/eval/Superfiltering/data_analysis.py b/dataflow/operators/text_sft/eval/Superfiltering/data_analysis.py index 498563d0..26e72663 100644 --- a/dataflow/operators/text_sft/eval/Superfiltering/data_analysis.py +++ b/dataflow/operators/text_sft/eval/Superfiltering/data_analysis.py @@ -49,5 +49,5 @@ def get_perplexity_and_embedding_part_text(tokenizer, model, text, target_span, return perplexity.to('cpu').item(), loss.to('cpu').item() - except: + except Exception: return 0, 0 diff --git a/dataflow/pipeline/Pipeline.py b/dataflow/pipeline/Pipeline.py index f9644918..9748c4c8 100644 --- a/dataflow/pipeline/Pipeline.py +++ b/dataflow/pipeline/Pipeline.py @@ -213,7 +213,7 @@ def _get_op_node_str(self, node:OperatorNode): try: import networkx - except: + except Exception: raise ImportError("Please install networkx to draw graph. Please run `pip install networkx[default]`.") import matplotlib.pyplot as plt G = networkx.DiGraph() diff --git a/dataflow/prompts/reasoning/diy.py b/dataflow/prompts/reasoning/diy.py index e43fccb8..cd1adbdb 100644 --- a/dataflow/prompts/reasoning/diy.py +++ b/dataflow/prompts/reasoning/diy.py @@ -13,7 +13,7 @@ def __init__(self, prompt_template): def build_prompt(self, question: str) -> str: try: return self.prompt_template + question + r'''Your response must start directly with "Solution:" without any preamble. Finish your response immediately after the solution.''' - except: + except Exception: self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.") @PROMPT_REGISTRY.register() @@ -25,7 +25,7 @@ def __init__(self, prompt_template): def build_prompt(self, question: str) -> str: try: return self.prompt_template.format(question=question) - except: + except Exception: self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.") @PROMPT_REGISTRY.register() @@ -37,5 +37,5 @@ def __init__(self, prompt_template): def build_prompt(self, question: str) -> str: try: return self.prompt_template.format(question=question) - except: + except Exception: self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.") \ No newline at end of file diff --git a/dataflow/serving/local_model_llm_serving.py b/dataflow/serving/local_model_llm_serving.py index ad469d86..86e10fbf 100644 --- a/dataflow/serving/local_model_llm_serving.py +++ b/dataflow/serving/local_model_llm_serving.py @@ -152,7 +152,7 @@ def generate_from_input(self, try: from vllm import SamplingParams from vllm.sampling_params import GuidedDecodingParams - except: + except Exception: raise ImportError("please install vllm first like 'pip install open-dataflow[vllm]'") guided_decoding_params = GuidedDecodingParams( diff --git a/dataflow/serving/localhost_llm_api_serving.py b/dataflow/serving/localhost_llm_api_serving.py index efb598e8..eb82d301 100644 --- a/dataflow/serving/localhost_llm_api_serving.py +++ b/dataflow/serving/localhost_llm_api_serving.py @@ -153,7 +153,7 @@ def format_response(self, response: dict) -> str: try: reasoning_content = response['choices'][0]["message"]["reasoning_content"] - except: + except Exception: reasoning_content = "" if reasoning_content != "": diff --git a/dataflow/serving/localmodel_lalm_serving.py b/dataflow/serving/localmodel_lalm_serving.py index 3443ce35..8fcf12d1 100644 --- a/dataflow/serving/localmodel_lalm_serving.py +++ b/dataflow/serving/localmodel_lalm_serving.py @@ -175,7 +175,7 @@ def start_serving(self): # vLLM requires the multiprocessing method to be set to spawn try: from vllm import LLM,SamplingParams - except: + except Exception: raise ImportError("please install vllm first like 'pip install open-dataflow[vllm]'") # Set the environment variable for vllm to use spawn method for multiprocessing # See https://docs.vllm.ai/en/v0.7.1/design/multiprocessing.html diff --git a/dataflow/statics/pipelines/api_pipelines/agentic_rag_multihop_core_pipeline.py b/dataflow/statics/pipelines/api_pipelines/agentic_rag_multihop_core_pipeline.py index 0ee9c2ff..4ad6a3fd 100644 --- a/dataflow/statics/pipelines/api_pipelines/agentic_rag_multihop_core_pipeline.py +++ b/dataflow/statics/pipelines/api_pipelines/agentic_rag_multihop_core_pipeline.py @@ -48,7 +48,7 @@ def _clean_json_block(item: Any) -> str: def safe_parse_json(text: Any) -> Any: try: return json.loads(_clean_json_block(text)) - except: + except Exception: return {} def normalize_answer(s: str) -> str: diff --git a/dataflow/utils/pdf2vqa/format_utils.py b/dataflow/utils/pdf2vqa/format_utils.py index 778431f8..e567d0f5 100644 --- a/dataflow/utils/pdf2vqa/format_utils.py +++ b/dataflow/utils/pdf2vqa/format_utils.py @@ -13,7 +13,7 @@ def refine_title(title: str, strict_title_match=False): try: # 其次提取中文数字章节编号(如六、二十四等) new_title = re.search(r'[一二三四五六七八九零十百]+', title).group() - except: + except Exception: new_title = title title = new_title return title @@ -46,7 +46,7 @@ def merge_qa_pair(vqa_jsonl, output_jsonl, strict_title_match=False): try: data["label"] = int(data["label"]) - except: + except Exception: continue if data["chapter_title"] != "" and data["chapter_title"] != chapter_title: @@ -87,7 +87,7 @@ def merge_qa_pair(vqa_jsonl, output_jsonl, strict_title_match=False): try: data["label"] = int(data["label"]) - except: + except Exception: continue if data["chapter_title"] != "" and data["chapter_title"] != chapter_title: diff --git a/dataflow/utils/reasoning/AnswerExtraction.py b/dataflow/utils/reasoning/AnswerExtraction.py index 1ddbf4d9..3670d0f0 100644 --- a/dataflow/utils/reasoning/AnswerExtraction.py +++ b/dataflow/utils/reasoning/AnswerExtraction.py @@ -45,7 +45,7 @@ def _fix_a_slash_b(string): a, b = int(a) if "sqrt" not in a else a, int(b) if "sqrt" not in b else b assert string == f"{a}/{b}" return f"\\frac{{{a}}}{{{b}}}" - except: + except Exception: return string @staticmethod @@ -62,7 +62,7 @@ def convert_word_number(text: str) -> str: """ try: return str(w2n.word_to_num(text)) - except: + except Exception: return text diff --git a/dataflow/utils/reasoning/CategoryFuzz.py b/dataflow/utils/reasoning/CategoryFuzz.py index c01f4b43..efa2e02d 100644 --- a/dataflow/utils/reasoning/CategoryFuzz.py +++ b/dataflow/utils/reasoning/CategoryFuzz.py @@ -138,7 +138,7 @@ def category_hasher(self,primary: str, secondary: str) -> float: k = self.primary_categories.index(primary) m = self.secondary_categories[primary].index(secondary) return k * 8 + m - except: + except Exception: return 170 def category_hasher_reverse(self,hash: float) -> tuple[str, str]: diff --git a/dataflow/utils/storage.py b/dataflow/utils/storage.py index 10ea8447..ac0a82a9 100644 --- a/dataflow/utils/storage.py +++ b/dataflow/utils/storage.py @@ -325,7 +325,7 @@ def clean_surrogates(obj): else: try: return clean_surrogates(str(obj)) - except: + except Exception: return obj if isinstance(data, list): @@ -649,7 +649,7 @@ def clean_surrogates(obj): # 其他类型(如自定义对象)尝试转为字符串处理 try: return clean_surrogates(str(obj)) - except: + except Exception: # 如果转换失败,返回原对象或空字符串(根据需求选择) return obj @@ -1042,7 +1042,7 @@ def clean_surrogates(obj): # 其他类型(如自定义对象)尝试转为字符串处理 try: return clean_surrogates(str(obj)) - except: + except Exception: # 如果转换失败,返回原对象或空字符串(根据需求选择) return obj diff --git a/dataflow/utils/text2sql/database_connector/sqlite_vec_connector.py b/dataflow/utils/text2sql/database_connector/sqlite_vec_connector.py index 3dc8b3bd..467e5d2b 100644 --- a/dataflow/utils/text2sql/database_connector/sqlite_vec_connector.py +++ b/dataflow/utils/text2sql/database_connector/sqlite_vec_connector.py @@ -337,7 +337,7 @@ def execute_query(self, connection: sqlite3.Connection, sql: str, params: Option if cursor: try: cursor.close() - except: + except Exception: pass def explain_query(self, connection: sqlite3.Connection, sql: str, params: Optional[Tuple] = None) -> QueryResult: @@ -370,7 +370,7 @@ def explain_query(self, connection: sqlite3.Connection, sql: str, params: Option if cursor: try: cursor.close() - except: + except Exception: pass def _get_db_details(self, schema: Dict[str, Any]) -> str: diff --git a/dataflow/version.py b/dataflow/version.py index 8c06d18e..84816a6c 100644 --- a/dataflow/version.py +++ b/dataflow/version.py @@ -1,6 +1,11 @@ __version__ = '1.0.8' short_version = __version__ + +def get_version(): + """Return the DataFlow version string.""" + return __version__ + def parse_version_info(version_str): """Parse a version string into a tuple. diff --git a/dataflow/webui/operator_pipeline.py b/dataflow/webui/operator_pipeline.py index 017e5f3b..37055a94 100644 --- a/dataflow/webui/operator_pipeline.py +++ b/dataflow/webui/operator_pipeline.py @@ -469,7 +469,7 @@ def gradio_run_pipeline( preview = df.head(5).to_html(index=False, escape=True) wrapped = "" + preview return summary, wrapped - except: + except Exception: return f"**Pipeline执行完成** 输出文件: {out_fn}", "
无法预览结果" else: return "**Pipeline执行完成** 但未找到输出文件", ""