Excel-AI-project/ai_connect.py at main · firstsmile-dev/Excel-AI-project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
"""
ai_connect.py - Uses OpenAI API to process macro output JSON.
Refactored for better error handling, logging, and clarity.
"""

import json
import os
import logging
from typing import Any
import glob

from dotenv import load_dotenv
from openai import APIError, AuthenticationError, OpenAI

# Load environment variables from .env file
load_dotenv()

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s"
)


def edit_json_with_openai(
    json_path: str,
    model: str = "gpt-4.1-mini",
    api_key: str | None = None,
) -> Any:
    """
    Send JSON data to OpenAI for processing and return the edited result.
    Handles API key retrieval, error handling, and logging.
    """
    # Get API key from parameter, .env file, environment variable, or raise error
    if api_key:
        api_key_value = api_key
    else:
        api_key_value = os.getenv("OPENAI_API_KEY")
        if not api_key_value:
            logging.error("OpenAI API key not provided. ")
            raise ValueError(
                "set OPENAI_API_KEY in your .env file or environment variable."
            )

    # Initialize OpenAI client
    client = OpenAI(api_key=api_key_value)

    # Load data
    try:
        with open(json_path, "r", encoding="utf-8") as file_handle:
            data = json.load(file_handle)
        logging.info(f"Loaded JSON data from {json_path}")
    except FileNotFoundError as exc:
        logging.error(f"err JSON file not found: {json_path}")
        raise FileNotFoundError(f"JSON file not found: {json_path}") from exc
    except json.JSONDecodeError as exc:
        logging.error(f"Invalid JSON in file {json_path}: {exc}")
        raise json.JSONDecodeError(
            f"Invalid JSON in file {json_path}: {exc}", exc.doc, exc.pos
        ) from exc

    # Compose system message and user content
    system_msg = """# Identity
                    あなたは、入力されたテキストからマンガ／ラノベ／書籍タイトルの「正式名称のみ」を抽出し、不要要素を取り除いて整形するアシスタントです。
                    あなたの目的は、タイトル一覧をクリーンで一貫した形式に統一して出力することです。

                    # Instructions
                    以下のルールに厳密に従って出力してください。

                    1. 入力に含まれる **話数、巻数、出版社名、サブタイトル、記号、エロ・成人タグ、説明文、広告文、シリーズ名以外の情報** をすべて削除してください。
                    2. 抽出対象は **作品タイトルの正式名称のみ** とします。
                    3. **同一作品の表記ゆれ**（全角／半角、記号、サブタイトルの有無、略称の違い）は **一つの正式な表記に統一** してください。
                    4. 出力は **1行につき1タイトル** とします。
                    5. **タイトル以外の情報を推測して追加してはいけません。**
                    6. 原作名とシリーズ名の区別が必要な場合は、**シリーズ名を優先** してください。
                    7. 表記は **日本語のまま、正式名称に統一** してください。
                    8. **コメントや説明文は一切書かず、タイトルのみを出力** してください。
                    9. タイトルに巻数を示す数字（3、ローマ数字、日本語の漢数字など）が含まれている場合はお知らせください。
                       数字が含まれているときは、それが本の巻数を正確に示しているかどうかを判定し、巻数であると判断した場合は数字だけを教えてください（例：3）。

                    # Example1
                    <user_query>
                    ちびっ子転生日記帳～お友達いっは?いつくりましゅ!～ THE COMIC 2 (マッグガーデンコミック Beat'sシリーズ)
                    </user_query>

                    <assistant_response>
                    ちびっ子転生日記帳～お友達いっぱいつくりましゅ!～ THE COMIC
                    2
                    </assistant_response>

                    # Example2

                    <user_query>
                    ミッドナイトレストラン 7to7
                    </user_query>

                    <assistant_response>
                    ミッドナイトレストラン 7to7
                    0
                    </assistant_response>

                    # Example3

                    <user_query>
                    ながたんと青と-いちかの料理帖-
                    </user_query>

                    <assistant_response>
                    ながたんと青と－いちかの料理帖－
                    0
                    </assistant_response>

                    # Example4

                    <user_query>
                    おっさん底辺治癒士と愛娘の辺境ライフ～中年男が回復スキルに覚醒して、英雄へ成り上がる～(コミック) :
                    </user_query>

                    <assistant_response>
                    おっさん底辺治癒士と愛娘の辺境ライフ～中年男が回復スキルに覚醒して、英雄へ成り上がる～
                    0
                    </assistant_response>

                    # Example5

                    <user_query>
                    ハボウの轍 4 ~公安調査庁調査官・土師空也~
                    </user_query>

                    <assistant_response>
                    ハボウの轍～公安調査庁調査官・土師空也～
                    4
                    </assistant_response>

                    # Example6

                    <user_query>
                    バリタチNo.1に負けた俺がネコデビューするまで (DAITO COMICS)
                    </user_query>

                    <assistant_response>
                    バリタチNo.1に負けた俺がネコデビューするまで
                    0
                    </assistant_response>

                    # Example7

                    <user_query>
                    私と結婚した事、後悔していませんか?VI (秋水デジタルコミックス)
                    </user_query>

                    <assistant_response>
                    私と結婚した事、後悔していませんか?
                    4
                    </assistant_response>

                    # Context
                    以下にユーザーが未整理のタイトル一覧を入力します。
                    ルールに従って正式タイトルのみを抽出・整形してください。"""
    edited_data = []
    for item in data:
        user_content = item.get("タイトル")
        color = item.get("color", False)
        new_item = item.copy()
        try:
            if color == True:
                response = client.responses.create(
                    model=model,
                    instructions=system_msg,
                    input=[
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "input_text",
                                    "text": user_content,
                                }
                            ],
                        }
                    ],
                )
                text = response.output_text
                print("response text", text)
                lines = [line.strip() for line in text.split("\n") if line.strip()]
                title = lines[0]
                explanation = lines[1]
                new_item["タイトル"] = title
                if(explanation != "0" ):
                    new_item["巻数"] = explanation
            else:
                new_item["タイトル"] = user_content
            edited_data.append(new_item)
        except json.JSONDecodeError as exc:
            logging.error(f"Model did not return valid JSON: {exc}")
            raise ValueError(
                f"Model did not return valid JSON: {exc}"
            ) from exc
        except AuthenticationError as exc:
            logging.error("Invalid OpenAI API key. Please check your API key.")
            raise ValueError("Invalid OpenAI API key. Please check your API key.") from exc
        except APIError as exc:
            logging.error(f"OpenAI API error: {exc}")
            raise RuntimeError(f"OpenAI API error: {exc}") from exc

    return edited_data

def input_json_convert_csv(json_data, csv_path:str):
    import csv
    """Convert JSON data to CSV and save to the specified path."""
    if not json_data:
        logging.warning("No data provided for CSV conversion.")
        return
    try:
        with open(csv_path, "r", encoding="cp932", errors="ignore") as f:
            reader = csv.reader(f)
            rows = list(reader)
        real_data = rows[0:1]  # header row
        for item in json_data:
            row = [""] * len(rows[0])
            row[2] = item.get("タイトル", "") #C column
            row[6] = item.get("巻数", "") #G column
            row[14] = item.get("ASIN", "") #O column

            real_data.append(row)

        with open(csv_path, "w", encoding="cp932", newline="") as f:
            writer = csv.writer(f)
            writer.writerows(real_data)
        return True
    except Exception as exc:
        logging.error(f"Error converting JSON to CSV: {exc}")
        raise RuntimeError(f"Error converting JSON to CSV: {exc}") from exc

def find_file_by_ext(folder: str, ext: str) -> str | None:
    """Find the first file with the given extension in the folder."""
    files = glob.glob(os.path.join(folder, f"*.{ext}"))
    return files[0] if files else None

# Example usage:
def __main__():
    try:
        json_path = find_file_by_ext("./public", "json")
        csv_path = find_file_by_ext("./public", "csv")
        if not json_path:
            logging.error("No .json file found in ./public")
            return
        if not csv_path:
            logging.error("No .csv file found in ./public")
            return
        edited_data = edit_json_with_openai(json_path)
        convert_info = input_json_convert_csv(edited_data, csv_path)
        print(convert_info)
    except Exception as e:
        logging.error(f"Error in OpenAI processing: {e}")