OpenDCAI · SunnyHaze · Mar 2, 2026 · Feb 25, 2026 · Mar 2, 2026
diff --git a/dataflow/cli_funcs/cli_pdf.py b/dataflow/cli_funcs/cli_pdf.py
@@ -61,7 +61,7 @@ def get_dataflow_script_path(script_name: str) -> Path:
                 return script_path
 
         return None
-    except:
+    except Exception:
         return None
 
 
@@ -400,7 +400,7 @@ def cli_pdf2model_train(lf_yaml: str = ".cache/train_config.yaml", cache_path: s
             with open(config_path_obj, 'r', encoding='utf-8') as f:
                 config = yaml.safe_load(f)
                 actual_output_dir = config.get('output_dir', 'unknown')
-        except:
+        except Exception:
             actual_output_dir = 'unknown'
 
         print("Training completed successfully!")

diff --git a/dataflow/cli_funcs/cli_text.py b/dataflow/cli_funcs/cli_text.py
@@ -61,7 +61,7 @@ def get_dataflow_script_path(script_name: str) -> Path:
                 return script_path
 
         return None
-    except:
+    except Exception:
         return None
 
 
@@ -448,7 +448,7 @@ def cli_text2model_train(input_keys: str = None, lf_yaml: str = "./.cache/train_
             file_size = qa_file.stat().st_size
             print(
                 f"{Fore.GREEN}✅ Step 3 completed: {sample_count} training samples ({file_size} bytes){Style.RESET_ALL}")
-        except:
+        except Exception:
             print(f"{Fore.GREEN}✅ Step 3 completed{Style.RESET_ALL}")
 
         # Step 4: Training
@@ -506,7 +506,7 @@ def _run_text2qa_workflow(current_dir: Path, cache_path_obj: Path, config_path_o
         with open(config_path_obj, 'r', encoding='utf-8') as f:
             config = yaml.safe_load(f)
             actual_output_dir = config.get('output_dir', 'unknown')
-    except:
+    except Exception:
         actual_output_dir = 'unknown'
 
     print("Text2QA training completed successfully!")
@@ -552,7 +552,7 @@ def _run_text2qa_workflow(current_dir: Path, cache_path_obj: Path, config_path_o
 #         with open(config_path_obj, 'r', encoding='utf-8') as f:
 #             config = yaml.safe_load(f)
 #             actual_output_dir = config.get('output_dir', 'unknown')
-#     except:
+#     except Exception:
 #         actual_output_dir = 'unknown'
 
 #     print("Text2QA training completed successfully!")

diff --git a/dataflow/operators/core_text/eval/bench_dataset_evaluator.py b/dataflow/operators/core_text/eval/bench_dataset_evaluator.py
@@ -54,10 +54,10 @@ def __init__(self,
     def math_verify_compare(self, answer, ground_truth):
         try:
             return verify(parse(str(ground_truth)), parse(str(answer)))
-        except:
+        except Exception:
             try:
                 return verify(parse(ground_truth), parse(answer))
-            except:
+            except Exception:
                 return False
 
     def ResolveResponse(self, response):

diff --git a/dataflow/operators/core_text/eval/bench_dataset_evaluator_question.py b/dataflow/operators/core_text/eval/bench_dataset_evaluator_question.py
@@ -61,10 +61,10 @@ def __init__(self,
     def math_verify_compare(self, answer, ground_truth):
         try:
             return verify(parse(str(ground_truth)), parse(str(answer)))
-        except:
+        except Exception:
             try:
                 return verify(parse(ground_truth), parse(answer))
-            except:
+            except Exception:
                 return False
 
     def ResolveResponse(self, response):

diff --git a/dataflow/operators/pdf2vqa/generate/llm_output_parser.py b/dataflow/operators/pdf2vqa/generate/llm_output_parser.py
@@ -40,12 +40,12 @@ def _id_to_text(self, input_ids, input_json, image_prefix="images"):
         for id in id_list:
             try: 
                 int(id)
-            except:
+            except Exception:
                 continue
             if int(id) < len(input_json):
                 try:
                     item = input_json[int(id)]
-                except:
+                except Exception:
                     continue
                 if 'text' in item:
                     texts.append(item['text'])
@@ -55,13 +55,13 @@ def _id_to_text(self, input_ids, input_json, image_prefix="images"):
                         img_name = os.path.basename(img_path)
                         new_path = f"{image_prefix}/{img_name}"
                         texts.append(f"![{' '.join(item.get('image_caption','image'))}]({new_path})")
-                    except:
+                    except Exception:
                         pass
                 elif item.get('type','') == 'list':
                     if item['sub_type'] == 'text':
                         try:
                             texts.append(input_json[int(id)]['list_items'].pop(0))
-                        except:
+                        except Exception:
                             pass
         return '\n'.join(texts)
 

diff --git a/dataflow/operators/pdf2vqa/generate/mineru_to_llm_input_operator.py b/dataflow/operators/pdf2vqa/generate/mineru_to_llm_input_operator.py
@@ -62,7 +62,7 @@ def run(self, storage: DataFlowStorage,
             md_path = Path(row[input_markdown_path_key])
             try:
                 input_json_path = list(md_path.parent.glob("*_content_list.json"))[0]
-            except:
+            except Exception:
                 raise ValueError("No _content_list.json file found in the api result. There might be an error with the Mineru api.")
 
             converted_path = str(input_json_path).replace('.json', '_converted.json')

diff --git a/dataflow/operators/reasoning/filter/reasoning_answer_groundtruth_filter.py b/dataflow/operators/reasoning/filter/reasoning_answer_groundtruth_filter.py
@@ -29,10 +29,10 @@ def exact_compare(self, answer, ground_truth):
     def math_verify_compare(self, answer, ground_truth):
         try:
             return verify(parse(str(ground_truth)), parse(str(answer)))
-        except:
+        except Exception:
             try:
                 return verify(parse(ground_truth), parse(answer))
-            except:
+            except Exception:
                 return False
 
     @staticmethod

diff --git a/dataflow/operators/reasoning/generate/reasoning_question_generator.py b/dataflow/operators/reasoning/generate/reasoning_question_generator.py
@@ -117,7 +117,7 @@ def _reformat_prompt(self, dataframe):
                     try:
                         used_prompt = self.prompts.build_prompt(question=question)
                         formatted_prompts.append(used_prompt.strip())
-                    except:
+                    except Exception:
                         self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.")
 
         return formatted_prompts

diff --git a/dataflow/operators/text2sql/eval/sql_component_classifier.py b/dataflow/operators/text2sql/eval/sql_component_classifier.py
@@ -271,7 +271,7 @@ def parse_value(self, toks, start_idx, tables_with_alias, schema, default_tables
             try:
                 val = float(toks[idx])  
                 idx += 1
-            except:
+            except Exception:
                 end_idx = idx
                 while end_idx < len_ and toks[end_idx] not in (',', ')', 'and', *self.CLAUSE_KEYWORDS, *self.JOIN_KEYWORDS):
                     end_idx += 1

diff --git a/dataflow/operators/text_sft/eval/Superfiltering/data_analysis.py b/dataflow/operators/text_sft/eval/Superfiltering/data_analysis.py
@@ -49,5 +49,5 @@ def get_perplexity_and_embedding_part_text(tokenizer, model, text, target_span,
 
         return perplexity.to('cpu').item(), loss.to('cpu').item()
 
-    except:
+    except Exception:
         return 0, 0
diff --git a/dataflow/pipeline/Pipeline.py b/dataflow/pipeline/Pipeline.py
@@ -213,7 +213,7 @@ def _get_op_node_str(self, node:OperatorNode):
 
         try:
             import networkx
-        except:
+        except Exception:
             raise ImportError("Please install networkx to draw graph. Please run `pip install networkx[default]`.")
         import matplotlib.pyplot as plt
         G = networkx.DiGraph()

diff --git a/dataflow/prompts/reasoning/diy.py b/dataflow/prompts/reasoning/diy.py
@@ -13,7 +13,7 @@ def __init__(self, prompt_template):
     def build_prompt(self, question: str) -> str:
         try:
             return self.prompt_template + question + r'''Your response must start directly with "Solution:" without any preamble. Finish your response immediately after the solution.'''
-        except:
+        except Exception:
             self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.")
 
 @PROMPT_REGISTRY.register()
@@ -25,7 +25,7 @@ def __init__(self, prompt_template):
     def build_prompt(self, question: str) -> str:
         try:
             return self.prompt_template.format(question=question)
-        except:
+        except Exception:
             self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.")
 
 @PROMPT_REGISTRY.register()
@@ -37,5 +37,5 @@ def __init__(self, prompt_template):
     def build_prompt(self, question: str) -> str:
         try:
             return self.prompt_template.format(question=question)
-        except:
+        except Exception:
             self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.")
diff --git a/dataflow/serving/local_model_llm_serving.py b/dataflow/serving/local_model_llm_serving.py
@@ -152,7 +152,7 @@ def generate_from_input(self,
             try:
                 from vllm import SamplingParams
                 from vllm.sampling_params import GuidedDecodingParams
-            except:
+            except Exception:
                 raise ImportError("please install vllm first like 'pip install open-dataflow[vllm]'")
 
             guided_decoding_params = GuidedDecodingParams(

diff --git a/dataflow/serving/localhost_llm_api_serving.py b/dataflow/serving/localhost_llm_api_serving.py
@@ -153,7 +153,7 @@ def format_response(self, response: dict) -> str:
 
         try:
             reasoning_content = response['choices'][0]["message"]["reasoning_content"]
-        except:
+        except Exception:
             reasoning_content = ""
 
         if reasoning_content != "":

diff --git a/dataflow/serving/localmodel_lalm_serving.py b/dataflow/serving/localmodel_lalm_serving.py
@@ -175,7 +175,7 @@ def start_serving(self):
         # vLLM requires the multiprocessing method to be set to spawn
         try:
             from vllm import LLM,SamplingParams
-        except:
+        except Exception:
             raise ImportError("please install vllm first like 'pip install open-dataflow[vllm]'")
         # Set the environment variable for vllm to use spawn method for multiprocessing
         # See https://docs.vllm.ai/en/v0.7.1/design/multiprocessing.html 

diff --git a/dataflow/statics/pipelines/api_pipelines/agentic_rag_multihop_core_pipeline.py b/dataflow/statics/pipelines/api_pipelines/agentic_rag_multihop_core_pipeline.py
@@ -48,7 +48,7 @@ def _clean_json_block(item: Any) -> str:
 def safe_parse_json(text: Any) -> Any:
     try:
         return json.loads(_clean_json_block(text))
-    except:
+    except Exception:
         return {} 
 
 def normalize_answer(s: str) -> str:

diff --git a/dataflow/utils/pdf2vqa/format_utils.py b/dataflow/utils/pdf2vqa/format_utils.py
@@ -13,7 +13,7 @@ def refine_title(title: str, strict_title_match=False):
             try:
                 # 其次提取中文数字章节编号（如六、二十四等）
                 new_title = re.search(r'[一二三四五六七八九零十百]+', title).group()   
-            except:
+            except Exception:
                 new_title = title
         title = new_title
     return title
@@ -46,7 +46,7 @@ def merge_qa_pair(vqa_jsonl, output_jsonl, strict_title_match=False):
 
             try:
                 data["label"] = int(data["label"])
-            except:
+            except Exception:
                 continue
 
             if data["chapter_title"] != "" and data["chapter_title"] != chapter_title:
@@ -87,7 +87,7 @@ def merge_qa_pair(vqa_jsonl, output_jsonl, strict_title_match=False):
 
             try:
                 data["label"] = int(data["label"])
-            except:
+            except Exception:
                 continue
 
             if data["chapter_title"] != "" and data["chapter_title"] != chapter_title:

diff --git a/dataflow/utils/reasoning/AnswerExtraction.py b/dataflow/utils/reasoning/AnswerExtraction.py
@@ -45,7 +45,7 @@ def _fix_a_slash_b(string):
             a, b = int(a) if "sqrt" not in a else a, int(b) if "sqrt" not in b else b
             assert string == f"{a}/{b}"
             return f"\\frac{{{a}}}{{{b}}}"
-        except:
+        except Exception:
             return string
 
     @staticmethod
@@ -62,7 +62,7 @@ def convert_word_number(text: str) -> str:
         """
         try:
             return str(w2n.word_to_num(text))
-        except:
+        except Exception:
             return text
 
 

diff --git a/dataflow/utils/reasoning/CategoryFuzz.py b/dataflow/utils/reasoning/CategoryFuzz.py
@@ -138,7 +138,7 @@ def category_hasher(self,primary: str, secondary: str) -> float:
             k = self.primary_categories.index(primary)
             m = self.secondary_categories[primary].index(secondary)
             return k * 8 + m
-        except:
+        except Exception:
             return 170
 
     def category_hasher_reverse(self,hash: float) -> tuple[str, str]:

diff --git a/dataflow/utils/storage.py b/dataflow/utils/storage.py
@@ -325,7 +325,7 @@ def clean_surrogates(obj):
             else:
                 try:
                     return clean_surrogates(str(obj))
-                except:
+                except Exception:
                     return obj
 
         if isinstance(data, list):
@@ -649,7 +649,7 @@ def clean_surrogates(obj):
                 # 其他类型（如自定义对象）尝试转为字符串处理
                 try:
                     return clean_surrogates(str(obj))
-                except:
+                except Exception:
                     # 如果转换失败，返回原对象或空字符串（根据需求选择）
                     return obj
 
@@ -1042,7 +1042,7 @@ def clean_surrogates(obj):
                 # 其他类型（如自定义对象）尝试转为字符串处理
                 try:
                     return clean_surrogates(str(obj))
-                except:
+                except Exception:
                     # 如果转换失败，返回原对象或空字符串（根据需求选择）
                     return obj
 

diff --git a/dataflow/utils/text2sql/database_connector/sqlite_vec_connector.py b/dataflow/utils/text2sql/database_connector/sqlite_vec_connector.py
@@ -337,7 +337,7 @@ def execute_query(self, connection: sqlite3.Connection, sql: str, params: Option
             if cursor:
                 try:
                     cursor.close()
-                except:
+                except Exception:
                     pass
 
     def explain_query(self, connection: sqlite3.Connection, sql: str, params: Optional[Tuple] = None) -> QueryResult:
@@ -370,7 +370,7 @@ def explain_query(self, connection: sqlite3.Connection, sql: str, params: Option
             if cursor:
                 try:
                     cursor.close()
-                except:
+                except Exception:
                     pass
 
     def _get_db_details(self, schema: Dict[str, Any]) -> str:

diff --git a/dataflow/version.py b/dataflow/version.py
@@ -1,6 +1,11 @@
 __version__ = '1.0.8'
 short_version = __version__
 
+
+def get_version():
+    """Return the DataFlow version string."""
+    return __version__
+
 def parse_version_info(version_str):
     """Parse a version string into a tuple.
 

diff --git a/dataflow/webui/operator_pipeline.py b/dataflow/webui/operator_pipeline.py
@@ -469,7 +469,7 @@ def gradio_run_pipeline(
                 preview = df.head(5).to_html(index=False, escape=True)
                 wrapped = "<style>table{table-layout:fixed;width:100%;}td,th{white-space:pre-wrap;}</style>" + preview
                 return summary, wrapped
-            except:
+            except Exception:
                 return f"**Pipeline执行完成** 输出文件: {out_fn}", "<pre>无法预览结果</pre>"
         else:
             return "**Pipeline执行完成** 但未找到输出文件", "<pre></pre>"