Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dataflow/cli_funcs/cli_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_dataflow_script_path(script_name: str) -> Path:
return script_path

return None
except:
except Exception:
return None


Expand Down Expand Up @@ -400,7 +400,7 @@ def cli_pdf2model_train(lf_yaml: str = ".cache/train_config.yaml", cache_path: s
with open(config_path_obj, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
actual_output_dir = config.get('output_dir', 'unknown')
except:
except Exception:
actual_output_dir = 'unknown'

print("Training completed successfully!")
Expand Down
8 changes: 4 additions & 4 deletions dataflow/cli_funcs/cli_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_dataflow_script_path(script_name: str) -> Path:
return script_path

return None
except:
except Exception:
return None


Expand Down Expand Up @@ -448,7 +448,7 @@ def cli_text2model_train(input_keys: str = None, lf_yaml: str = "./.cache/train_
file_size = qa_file.stat().st_size
print(
f"{Fore.GREEN}✅ Step 3 completed: {sample_count} training samples ({file_size} bytes){Style.RESET_ALL}")
except:
except Exception:
print(f"{Fore.GREEN}✅ Step 3 completed{Style.RESET_ALL}")

# Step 4: Training
Expand Down Expand Up @@ -506,7 +506,7 @@ def _run_text2qa_workflow(current_dir: Path, cache_path_obj: Path, config_path_o
with open(config_path_obj, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
actual_output_dir = config.get('output_dir', 'unknown')
except:
except Exception:
actual_output_dir = 'unknown'

print("Text2QA training completed successfully!")
Expand Down Expand Up @@ -552,7 +552,7 @@ def _run_text2qa_workflow(current_dir: Path, cache_path_obj: Path, config_path_o
# with open(config_path_obj, 'r', encoding='utf-8') as f:
# config = yaml.safe_load(f)
# actual_output_dir = config.get('output_dir', 'unknown')
# except:
# except Exception:
# actual_output_dir = 'unknown'

# print("Text2QA training completed successfully!")
Expand Down
4 changes: 2 additions & 2 deletions dataflow/operators/core_text/eval/bench_dataset_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ def __init__(self,
def math_verify_compare(self, answer, ground_truth):
try:
return verify(parse(str(ground_truth)), parse(str(answer)))
except:
except Exception:
try:
return verify(parse(ground_truth), parse(answer))
except:
except Exception:
return False

def ResolveResponse(self, response):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ def __init__(self,
def math_verify_compare(self, answer, ground_truth):
try:
return verify(parse(str(ground_truth)), parse(str(answer)))
except:
except Exception:
try:
return verify(parse(ground_truth), parse(answer))
except:
except Exception:
return False

def ResolveResponse(self, response):
Expand Down
8 changes: 4 additions & 4 deletions dataflow/operators/pdf2vqa/generate/llm_output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ def _id_to_text(self, input_ids, input_json, image_prefix="images"):
for id in id_list:
try:
int(id)
except:
except Exception:
continue
if int(id) < len(input_json):
try:
item = input_json[int(id)]
except:
except Exception:
continue
if 'text' in item:
texts.append(item['text'])
Expand All @@ -55,13 +55,13 @@ def _id_to_text(self, input_ids, input_json, image_prefix="images"):
img_name = os.path.basename(img_path)
new_path = f"{image_prefix}/{img_name}"
texts.append(f"![{' '.join(item.get('image_caption','image'))}]({new_path})")
except:
except Exception:
pass
elif item.get('type','') == 'list':
if item['sub_type'] == 'text':
try:
texts.append(input_json[int(id)]['list_items'].pop(0))
except:
except Exception:
pass
return '\n'.join(texts)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def run(self, storage: DataFlowStorage,
md_path = Path(row[input_markdown_path_key])
try:
input_json_path = list(md_path.parent.glob("*_content_list.json"))[0]
except:
except Exception:
raise ValueError("No _content_list.json file found in the api result. There might be an error with the Mineru api.")

converted_path = str(input_json_path).replace('.json', '_converted.json')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def exact_compare(self, answer, ground_truth):
def math_verify_compare(self, answer, ground_truth):
try:
return verify(parse(str(ground_truth)), parse(str(answer)))
except:
except Exception:
try:
return verify(parse(ground_truth), parse(answer))
except:
except Exception:
return False

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _reformat_prompt(self, dataframe):
try:
used_prompt = self.prompts.build_prompt(question=question)
formatted_prompts.append(used_prompt.strip())
except:
except Exception:
self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.")

return formatted_prompts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def parse_value(self, toks, start_idx, tables_with_alias, schema, default_tables
try:
val = float(toks[idx])
idx += 1
except:
except Exception:
end_idx = idx
while end_idx < len_ and toks[end_idx] not in (',', ')', 'and', *self.CLAUSE_KEYWORDS, *self.JOIN_KEYWORDS):
end_idx += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,5 @@ def get_perplexity_and_embedding_part_text(tokenizer, model, text, target_span,

return perplexity.to('cpu').item(), loss.to('cpu').item()

except:
except Exception:
return 0, 0
2 changes: 1 addition & 1 deletion dataflow/pipeline/Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def _get_op_node_str(self, node:OperatorNode):

try:
import networkx
except:
except Exception:
raise ImportError("Please install networkx to draw graph. Please run `pip install networkx[default]`.")
import matplotlib.pyplot as plt
G = networkx.DiGraph()
Expand Down
6 changes: 3 additions & 3 deletions dataflow/prompts/reasoning/diy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self, prompt_template):
def build_prompt(self, question: str) -> str:
try:
return self.prompt_template + question + r'''Your response must start directly with "Solution:" without any preamble. Finish your response immediately after the solution.'''
except:
except Exception:
self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.")

@PROMPT_REGISTRY.register()
Expand All @@ -25,7 +25,7 @@ def __init__(self, prompt_template):
def build_prompt(self, question: str) -> str:
try:
return self.prompt_template.format(question=question)
except:
except Exception:
self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.")

@PROMPT_REGISTRY.register()
Expand All @@ -37,5 +37,5 @@ def __init__(self, prompt_template):
def build_prompt(self, question: str) -> str:
try:
return self.prompt_template.format(question=question)
except:
except Exception:
self.logger.debug(f"Please check if the symbol {{question}} in prompt is missing.")
2 changes: 1 addition & 1 deletion dataflow/serving/local_model_llm_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def generate_from_input(self,
try:
from vllm import SamplingParams
from vllm.sampling_params import GuidedDecodingParams
except:
except Exception:
raise ImportError("please install vllm first like 'pip install open-dataflow[vllm]'")

guided_decoding_params = GuidedDecodingParams(
Expand Down
2 changes: 1 addition & 1 deletion dataflow/serving/localhost_llm_api_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def format_response(self, response: dict) -> str:

try:
reasoning_content = response['choices'][0]["message"]["reasoning_content"]
except:
except Exception:
reasoning_content = ""

if reasoning_content != "":
Expand Down
2 changes: 1 addition & 1 deletion dataflow/serving/localmodel_lalm_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def start_serving(self):
# vLLM requires the multiprocessing method to be set to spawn
try:
from vllm import LLM,SamplingParams
except:
except Exception:
raise ImportError("please install vllm first like 'pip install open-dataflow[vllm]'")
# Set the environment variable for vllm to use spawn method for multiprocessing
# See https://docs.vllm.ai/en/v0.7.1/design/multiprocessing.html
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _clean_json_block(item: Any) -> str:
def safe_parse_json(text: Any) -> Any:
try:
return json.loads(_clean_json_block(text))
except:
except Exception:
return {}

def normalize_answer(s: str) -> str:
Expand Down
6 changes: 3 additions & 3 deletions dataflow/utils/pdf2vqa/format_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def refine_title(title: str, strict_title_match=False):
try:
# 其次提取中文数字章节编号(如六、二十四等)
new_title = re.search(r'[一二三四五六七八九零十百]+', title).group()
except:
except Exception:
new_title = title
title = new_title
return title
Expand Down Expand Up @@ -46,7 +46,7 @@ def merge_qa_pair(vqa_jsonl, output_jsonl, strict_title_match=False):

try:
data["label"] = int(data["label"])
except:
except Exception:
continue

if data["chapter_title"] != "" and data["chapter_title"] != chapter_title:
Expand Down Expand Up @@ -87,7 +87,7 @@ def merge_qa_pair(vqa_jsonl, output_jsonl, strict_title_match=False):

try:
data["label"] = int(data["label"])
except:
except Exception:
continue

if data["chapter_title"] != "" and data["chapter_title"] != chapter_title:
Expand Down
4 changes: 2 additions & 2 deletions dataflow/utils/reasoning/AnswerExtraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _fix_a_slash_b(string):
a, b = int(a) if "sqrt" not in a else a, int(b) if "sqrt" not in b else b
assert string == f"{a}/{b}"
return f"\\frac{{{a}}}{{{b}}}"
except:
except Exception:
return string

@staticmethod
Expand All @@ -62,7 +62,7 @@ def convert_word_number(text: str) -> str:
"""
try:
return str(w2n.word_to_num(text))
except:
except Exception:
return text


Expand Down
2 changes: 1 addition & 1 deletion dataflow/utils/reasoning/CategoryFuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def category_hasher(self,primary: str, secondary: str) -> float:
k = self.primary_categories.index(primary)
m = self.secondary_categories[primary].index(secondary)
return k * 8 + m
except:
except Exception:
return 170

def category_hasher_reverse(self,hash: float) -> tuple[str, str]:
Expand Down
6 changes: 3 additions & 3 deletions dataflow/utils/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def clean_surrogates(obj):
else:
try:
return clean_surrogates(str(obj))
except:
except Exception:
return obj

if isinstance(data, list):
Expand Down Expand Up @@ -649,7 +649,7 @@ def clean_surrogates(obj):
# 其他类型(如自定义对象)尝试转为字符串处理
try:
return clean_surrogates(str(obj))
except:
except Exception:
# 如果转换失败,返回原对象或空字符串(根据需求选择)
return obj

Expand Down Expand Up @@ -1042,7 +1042,7 @@ def clean_surrogates(obj):
# 其他类型(如自定义对象)尝试转为字符串处理
try:
return clean_surrogates(str(obj))
except:
except Exception:
# 如果转换失败,返回原对象或空字符串(根据需求选择)
return obj

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def execute_query(self, connection: sqlite3.Connection, sql: str, params: Option
if cursor:
try:
cursor.close()
except:
except Exception:
pass

def explain_query(self, connection: sqlite3.Connection, sql: str, params: Optional[Tuple] = None) -> QueryResult:
Expand Down Expand Up @@ -370,7 +370,7 @@ def explain_query(self, connection: sqlite3.Connection, sql: str, params: Option
if cursor:
try:
cursor.close()
except:
except Exception:
pass

def _get_db_details(self, schema: Dict[str, Any]) -> str:
Expand Down
5 changes: 5 additions & 0 deletions dataflow/version.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
__version__ = '1.0.8'
short_version = __version__


def get_version():
"""Return the DataFlow version string."""
return __version__

def parse_version_info(version_str):
"""Parse a version string into a tuple.

Expand Down
2 changes: 1 addition & 1 deletion dataflow/webui/operator_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ def gradio_run_pipeline(
preview = df.head(5).to_html(index=False, escape=True)
wrapped = "<style>table{table-layout:fixed;width:100%;}td,th{white-space:pre-wrap;}</style>" + preview
return summary, wrapped
except:
except Exception:
return f"**Pipeline执行完成** 输出文件: {out_fn}", "<pre>无法预览结果</pre>"
else:
return "**Pipeline执行完成** 但未找到输出文件", "<pre></pre>"
Expand Down