From 96798cb1b6c03079131940f41fe7b77b0636aac7 Mon Sep 17 00:00:00 2001 From: Mathias Winkel Date: Thu, 27 Jul 2023 13:15:51 +0200 Subject: [PATCH 1/7] create individual workdir for kernel and initialize a dedicated virtual env TODO: file up- and download does not work yet due to the 'workspace' dir not being mapped correctly --- .gitignore | 1 + frontend/src/App.tsx | 3 +- frontend/src/components/Chat.tsx | 1 + gpt_code_ui/kernel_program/kernel_manager.py | 84 +++++++++++++------- 4 files changed, 60 insertions(+), 29 deletions(-) diff --git a/.gitignore b/.gitignore index a1a5b8a0..28c19937 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ # Program related process_pids/ +kernel.* kernel_connection_file.json # Python stuff diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 3212d273..9a5e5c95 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -53,7 +53,7 @@ function App() { ]) ); let [waitingForSystem, setWaitingForSystem] = useState( - WaitingStates.Idle + WaitingStates.StartingKernel ); const chatScrollRef = React.useRef(null); @@ -78,6 +78,7 @@ function App() { const handleCommand = (command: string) => { if (command == "reset") { addMessage({ text: "Restarting the kernel.", type: "message", role: "system" }); + setWaitingForSystem(WaitingStates.StartingKernel); fetch(`${Config.API_ADDRESS}/restart`, { method: "POST", diff --git a/frontend/src/components/Chat.tsx b/frontend/src/components/Chat.tsx index 4fbedb1e..56d2b830 100644 --- a/frontend/src/components/Chat.tsx +++ b/frontend/src/components/Chat.tsx @@ -94,6 +94,7 @@ function Message(props: { export enum WaitingStates { + StartingKernel = "Starting Kernel", GeneratingCode = "Generating code", RunningCode = "Running code", UploadingFile = "Uploading file", diff --git a/gpt_code_ui/kernel_program/kernel_manager.py b/gpt_code_ui/kernel_program/kernel_manager.py index 93f6d799..417c403f 100644 --- a/gpt_code_ui/kernel_program/kernel_manager.py +++ b/gpt_code_ui/kernel_program/kernel_manager.py @@ -1,6 +1,8 @@ import sys import subprocess import os +import shutil +import atexit import queue import json import signal @@ -167,30 +169,53 @@ def flush_kernel_msgs(kc, tries=1, timeout=0.2): logger.debug(f"{e} [{type(e)}") -def start_kernel(): - kernel_connection_file = os.path.join(os.getcwd(), "kernel_connection_file.json") - - if os.path.isfile(kernel_connection_file): - os.remove(kernel_connection_file) - if os.path.isdir(kernel_connection_file): - os.rmdir(kernel_connection_file) - - launch_kernel_script_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), "launch_kernel.py" - ) - - os.makedirs('workspace/', exist_ok=True) - +def start_kernel(id: str): + cwd = pathlib.Path(os.getcwd()) + kernel_dir = cwd / f'kernel.{id}' + kernel_venv = kernel_dir / 'venv' + kernel_venv_bindir = kernel_venv / 'bin' + kernel_python_executable = kernel_venv_bindir / os.path.basename(sys.executable) + kernel_connection_file = kernel_dir / "kernel_connection_file.json" + launch_kernel_script_path = pathlib.Path(__file__).parent.resolve() / "launch_kernel.py" + kernel_env = os.environ.copy() + kernel_env['PATH'] = str(kernel_venv_bindir) + os.pathsep + kernel_env['PATH'] + + # Cleanup potential leftovers + shutil.rmtree(kernel_dir, ignore_errors=True) + + os.makedirs(kernel_dir) + + # create virtual env inside kernel_venv directory + subprocess.run([sys.executable, '-m', 'venv', kernel_venv, '--upgrade-deps', '--system-site-packages']) + # install wheel because some packages do not like being installed without + subprocess.run([kernel_python_executable, '-m', 'pip', 'install', 'wheel>=0.41,<1.0']) + # install all default packages into the venv + default_packages = [ + "ipykernel>=6,<7", + "numpy>=1.24,<1.25", + "dateparser>=1.1,<1.2", + "pandas>=1.5,<1.6", + "geopandas>=0.13,<0.14", + "tabulate>=0.9.0<1.0", + "PyPDF2>=3.0,<3.1", + "pdfminer>=20191125,<20191200", + "pdfplumber>=0.9,<0.10", + "matplotlib>=3.7,<3.8", + ] + subprocess.run([kernel_python_executable, '-m', 'pip', 'install'] + default_packages) + + # start the kernel using the virtual env python executable kernel_process = subprocess.Popen( [ - sys.executable, + kernel_python_executable, launch_kernel_script_path, "--IPKernelApp.connection_file", kernel_connection_file, "--matplotlib=inline", "--quiet", ], - cwd='workspace/' + cwd=kernel_dir, + env=kernel_env, ) # Write PID for caller to kill str_kernel_pid = str(kernel_process.pid) @@ -200,25 +225,28 @@ def start_kernel(): # Wait for kernel connection file to be written while True: - if not os.path.isfile(kernel_connection_file): + try: + with open(kernel_connection_file, 'r') as fp: + json.load(fp) + except (FileNotFoundError, json.JSONDecodeError): + # Either file was not yet there or incomplete (then JSON parsing failed) sleep(0.1) + pass else: - # Keep looping if JSON parsing fails, file may be partially written - try: - with open(kernel_connection_file, 'r') as fp: - json.load(fp) - break - except json.JSONDecodeError: - pass + break # Client - kc = BlockingKernelClient(connection_file=kernel_connection_file) + kc = BlockingKernelClient(connection_file=str(kernel_connection_file)) kc.load_connection_file() kc.start_channels() kc.wait_for_ready() - return kc + return kc, kernel_dir if __name__ == "__main__": - kc = start_kernel() - start_snakemq(kc) \ No newline at end of file + kc, kernel_dir = start_kernel(id='ffa907c8-1908-49e3-974b-c0c27cbac6e4') # This is just a random but fixed ID for now - to be prepared for later multi-kernel scenarios + + # make sure the dir with the virtualenv will be deleted after kernel termination + atexit.register(lambda: shutil.rmtree(kernel_dir, ignore_errors=True)) + + start_snakemq(kc) From e6b0b517cdb8b9b211670325a27c77bfce739a09 Mon Sep 17 00:00:00 2001 From: Mathias Winkel Date: Thu, 27 Jul 2023 14:38:00 +0200 Subject: [PATCH 2/7] fix up-/download by using a fixed workspace name. this is fine as long as we do not implement multi-user-support. For this, things are prepared, though. --- gpt_code_ui/kernel_program/kernel_manager.py | 15 ++++++++++----- gpt_code_ui/kernel_program/main.py | 9 ++++++--- gpt_code_ui/webapp/main.py | 5 +++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/gpt_code_ui/kernel_program/kernel_manager.py b/gpt_code_ui/kernel_program/kernel_manager.py index 417c403f..2788e375 100644 --- a/gpt_code_ui/kernel_program/kernel_manager.py +++ b/gpt_code_ui/kernel_program/kernel_manager.py @@ -9,7 +9,6 @@ import pathlib import threading import time -import atexit import traceback from time import sleep @@ -201,6 +200,7 @@ def start_kernel(id: str): "pdfminer>=20191125,<20191200", "pdfplumber>=0.9,<0.10", "matplotlib>=3.7,<3.8", + "openpyxl>=3.1.2,<4", ] subprocess.run([kernel_python_executable, '-m', 'pip', 'install'] + default_packages) @@ -244,9 +244,14 @@ def start_kernel(id: str): if __name__ == "__main__": - kc, kernel_dir = start_kernel(id='ffa907c8-1908-49e3-974b-c0c27cbac6e4') # This is just a random but fixed ID for now - to be prepared for later multi-kernel scenarios + try: + kernel_id = sys.argv[1] + except IndexError as e: + logger.exception('Missing kernel ID command line parameter', e) + else: + kc, kernel_dir = start_kernel(id=kernel_id) - # make sure the dir with the virtualenv will be deleted after kernel termination - atexit.register(lambda: shutil.rmtree(kernel_dir, ignore_errors=True)) + # make sure the dir with the virtualenv will be deleted after kernel termination + atexit.register(lambda: shutil.rmtree(kernel_dir, ignore_errors=True)) - start_snakemq(kc) + start_snakemq(kc) diff --git a/gpt_code_ui/kernel_program/main.py b/gpt_code_ui/kernel_program/main.py index 401133ee..5e02fc52 100644 --- a/gpt_code_ui/kernel_program/main.py +++ b/gpt_code_ui/kernel_program/main.py @@ -47,15 +47,18 @@ app = Flask(__name__) CORS(app) + def start_kernel_manager(): global kernel_manager_process kernel_manager_script_path = os.path.join( pathlib.Path(__file__).parent.resolve(), "kernel_manager.py" ) - kernel_manager_process = subprocess.Popen( - [sys.executable, kernel_manager_script_path] - ) + kernel_manager_process = subprocess.Popen([ + sys.executable, + kernel_manager_script_path, + 'workspace', # This will be used as part of the folder name for the workspace and to create the venv inside. Can be anything, but using 'workspace' makes file up-/download very simple + ]) # Write PID as .pid to config.KERNEL_PID_DIR os.makedirs(config.KERNEL_PID_DIR, exist_ok=True) diff --git a/gpt_code_ui/webapp/main.py b/gpt_code_ui/webapp/main.py index 332986e5..6930731b 100644 --- a/gpt_code_ui/webapp/main.py +++ b/gpt_code_ui/webapp/main.py @@ -33,7 +33,7 @@ else: raise ValueError(f'Invalid OPENAI_API_TYPE: {openai.api_type}') -UPLOAD_FOLDER = 'workspace/' +UPLOAD_FOLDER = 'kernel.workspace/' os.makedirs(UPLOAD_FOLDER, exist_ok=True) @@ -119,6 +119,7 @@ async def get_code(user_prompt, user_openai_key=None, model="gpt-3.5-turbo"): For data visualization, you can use 'matplotlib', # matplotlib==3.7.1 Be sure to generate charts with matplotlib. If you need geographical charts, use geopandas with the geopandas.datasets module. + If an additional package is required, you can add the corresponding "!pip install PACKAGE" call to the beginning of the code. If the user has just uploaded a file, focus on the file that was most recently uploaded (and optionally all previously uploaded files) Teacher mode: if the code modifies or produces a file, at the end of the code block insert a print statement that prints a link to it as HTML string: Download file. Replace INSERT_FILENAME_HERE with the actual filename.""" @@ -230,7 +231,7 @@ def download_file(): file = request.args.get('file') # from `workspace/` send the file # make sure to set required headers to make it download the file - return send_from_directory(os.path.join(os.getcwd(), 'workspace'), file, as_attachment=True) + return send_from_directory(os.path.join(os.getcwd(), 'kernel.workspace'), file, as_attachment=True) @app.route('/inject-context', methods=['POST']) From 565a0f0d26efc1d4162ec556994c1f90e337bc37 Mon Sep 17 00:00:00 2001 From: Mathias Winkel Date: Thu, 27 Jul 2023 15:08:04 +0200 Subject: [PATCH 3/7] some PEP8 and code deduplication --- gpt_code_ui/kernel_program/config.py | 2 +- gpt_code_ui/kernel_program/kernel_manager.py | 9 +++---- gpt_code_ui/kernel_program/launch_kernel.py | 2 +- gpt_code_ui/kernel_program/main.py | 25 +++++++++----------- gpt_code_ui/kernel_program/utils.py | 14 ++++++++++- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/gpt_code_ui/kernel_program/config.py b/gpt_code_ui/kernel_program/config.py index aa37c08f..b4a4a6df 100644 --- a/gpt_code_ui/kernel_program/config.py +++ b/gpt_code_ui/kernel_program/config.py @@ -15,4 +15,4 @@ def get_logger(): logger = logging.getLogger(__name__) if "DEBUG" in os.environ: logger.setLevel(logging.DEBUG) - return logger \ No newline at end of file + return logger diff --git a/gpt_code_ui/kernel_program/kernel_manager.py b/gpt_code_ui/kernel_program/kernel_manager.py index 2788e375..cf87deca 100644 --- a/gpt_code_ui/kernel_program/kernel_manager.py +++ b/gpt_code_ui/kernel_program/kernel_manager.py @@ -57,7 +57,7 @@ def cleanup_spawned_processes(): os.kill(pid, signal.CTRL_BREAK_EVENT) else: os.kill(pid, signal.SIGKILL) - + # After successful kill, cleanup pid file os.remove(fp) @@ -217,11 +217,8 @@ def start_kernel(id: str): cwd=kernel_dir, env=kernel_env, ) - # Write PID for caller to kill - str_kernel_pid = str(kernel_process.pid) - os.makedirs(config.KERNEL_PID_DIR, exist_ok=True) - with open(os.path.join(config.KERNEL_PID_DIR, str_kernel_pid + ".pid"), "w") as p: - p.write("kernel") + + utils.store_pid(kernel_process.pid, "kernel") # Wait for kernel connection file to be written while True: diff --git a/gpt_code_ui/kernel_program/launch_kernel.py b/gpt_code_ui/kernel_program/launch_kernel.py index d193051d..f66b36c8 100644 --- a/gpt_code_ui/kernel_program/launch_kernel.py +++ b/gpt_code_ui/kernel_program/launch_kernel.py @@ -1,4 +1,4 @@ if __name__ == "__main__": from ipykernel import kernelapp as app - app.launch_new_instance() \ No newline at end of file + app.launch_new_instance() diff --git a/gpt_code_ui/kernel_program/main.py b/gpt_code_ui/kernel_program/main.py index 5e02fc52..22c5addf 100644 --- a/gpt_code_ui/kernel_program/main.py +++ b/gpt_code_ui/kernel_program/main.py @@ -7,7 +7,6 @@ import time import asyncio -import json import threading from queue import Queue @@ -60,14 +59,13 @@ def start_kernel_manager(): 'workspace', # This will be used as part of the folder name for the workspace and to create the venv inside. Can be anything, but using 'workspace' makes file up-/download very simple ]) - # Write PID as .pid to config.KERNEL_PID_DIR - os.makedirs(config.KERNEL_PID_DIR, exist_ok=True) - with open(os.path.join(config.KERNEL_PID_DIR, "%d.pid" % kernel_manager_process.pid), "w") as p: - p.write("kernel_manager") + utils.store_pid(kernel_manager_process.pid, "kernel_manager") + def cleanup_kernel_program(): kernel_manager.cleanup_spawned_processes() + async def start_snakemq(): global messaging @@ -80,7 +78,7 @@ def on_recv(conn, ident, message): if message["value"] == "ready": logger.debug("Kernel is ready.") result_queue.put({ - "value":"Kernel is ready.", + "value": "Kernel is ready.", "type": "message" }) @@ -100,8 +98,9 @@ def send_queued_messages(): while True: if send_queue.qsize() > 0: message = send_queue.get() - utils.send_json(messaging, - {"type": "execute", "value": message["command"]}, + utils.send_json( + messaging, + {"type": "execute", "value": message["command"]}, config.IDENT_KERNEL_MANAGER ) time.sleep(0.1) @@ -120,7 +119,7 @@ async def async_link_loop(): @app.route("/api", methods=["POST", "GET"]) def handle_request(): - + if request.method == "GET": # Handle GET requests by sending everything that's in the receive_queue results = [result_queue.get() for _ in range(result_queue.qsize())] @@ -131,7 +130,8 @@ def handle_request(): send_queue.put(data) return jsonify({"result": "success"}) - + + @app.route("/restart", methods=["POST"]) def handle_restart(): @@ -155,9 +155,6 @@ async def main(): def run_flask_app(): app.run(host="0.0.0.0", port=APP_PORT) + if __name__ == "__main__": asyncio.run(main()) - - - - \ No newline at end of file diff --git a/gpt_code_ui/kernel_program/utils.py b/gpt_code_ui/kernel_program/utils.py index cce7d704..c7f4441d 100644 --- a/gpt_code_ui/kernel_program/utils.py +++ b/gpt_code_ui/kernel_program/utils.py @@ -1,3 +1,4 @@ +import os import re import json import snakemq.link @@ -7,6 +8,7 @@ import gpt_code_ui.kernel_program.config as config + def escape_ansi(line): ansi_escape = re.compile(r"(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]") return ansi_escape.sub("", line) @@ -16,6 +18,7 @@ def send_json(messaging, message, identity): message = snakemq.message.Message(json.dumps(message).encode("utf-8"), ttl=600) messaging.send_message(identity, message) + def init_snakemq(ident, init_type="listen"): link = snakemq.link.Link() packeter = snakemq.packeter.Packeter(link) @@ -26,4 +29,13 @@ def init_snakemq(ident, init_type="listen"): link.add_connector(("localhost", config.SNAKEMQ_PORT)) else: raise Exception("Unsupported init type.") - return messaging, link \ No newline at end of file + return messaging, link + + +def store_pid(pid: int, process_name: str): + ''' + Write PID as .pid to config.KERNEL_PID_DIR + ''' + os.makedirs(config.KERNEL_PID_DIR, exist_ok=True) + with open(os.path.join(config.KERNEL_PID_DIR, f"{pid}.pid"), "w") as p: + p.write(process_name) From 00619958076825e1f02be51b3f90d49dc4f8ff13 Mon Sep 17 00:00:00 2001 From: Mathias Winkel Date: Mon, 31 Jul 2023 16:18:31 +0200 Subject: [PATCH 4/7] make creation of new env optional --- gpt_code_ui/kernel_program/kernel_manager.py | 60 +++++++++++--------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/gpt_code_ui/kernel_program/kernel_manager.py b/gpt_code_ui/kernel_program/kernel_manager.py index cf87deca..92ce3bc3 100644 --- a/gpt_code_ui/kernel_program/kernel_manager.py +++ b/gpt_code_ui/kernel_program/kernel_manager.py @@ -20,6 +20,8 @@ import gpt_code_ui.kernel_program.utils as utils import gpt_code_ui.kernel_program.config as config +USE_SEPARATE_VENV = False + # Set up globals messaging = None logger = config.get_logger() @@ -171,38 +173,42 @@ def flush_kernel_msgs(kc, tries=1, timeout=0.2): def start_kernel(id: str): cwd = pathlib.Path(os.getcwd()) kernel_dir = cwd / f'kernel.{id}' - kernel_venv = kernel_dir / 'venv' - kernel_venv_bindir = kernel_venv / 'bin' - kernel_python_executable = kernel_venv_bindir / os.path.basename(sys.executable) - kernel_connection_file = kernel_dir / "kernel_connection_file.json" - launch_kernel_script_path = pathlib.Path(__file__).parent.resolve() / "launch_kernel.py" - kernel_env = os.environ.copy() - kernel_env['PATH'] = str(kernel_venv_bindir) + os.pathsep + kernel_env['PATH'] # Cleanup potential leftovers shutil.rmtree(kernel_dir, ignore_errors=True) - os.makedirs(kernel_dir) - # create virtual env inside kernel_venv directory - subprocess.run([sys.executable, '-m', 'venv', kernel_venv, '--upgrade-deps', '--system-site-packages']) - # install wheel because some packages do not like being installed without - subprocess.run([kernel_python_executable, '-m', 'pip', 'install', 'wheel>=0.41,<1.0']) - # install all default packages into the venv - default_packages = [ - "ipykernel>=6,<7", - "numpy>=1.24,<1.25", - "dateparser>=1.1,<1.2", - "pandas>=1.5,<1.6", - "geopandas>=0.13,<0.14", - "tabulate>=0.9.0<1.0", - "PyPDF2>=3.0,<3.1", - "pdfminer>=20191125,<20191200", - "pdfplumber>=0.9,<0.10", - "matplotlib>=3.7,<3.8", - "openpyxl>=3.1.2,<4", - ] - subprocess.run([kernel_python_executable, '-m', 'pip', 'install'] + default_packages) + kernel_env = os.environ.copy() + kernel_connection_file = kernel_dir / "kernel_connection_file.json" + launch_kernel_script_path = pathlib.Path(__file__).parent.resolve() / "launch_kernel.py" + + if not USE_SEPARATE_VENV: + kernel_python_executable = sys.executable # TODO: here we could also pick up an already existing env by simply selecting the corresponding Python binary + else: + kernel_venv = kernel_dir / 'venv' + kernel_venv_bindir = kernel_venv / 'bin' + kernel_python_executable = kernel_venv_bindir / os.path.basename(sys.executable) + kernel_env['PATH'] = str(kernel_venv_bindir) + os.pathsep + kernel_env['PATH'] + + # create virtual env inside kernel_venv directory + subprocess.run([sys.executable, '-m', 'venv', kernel_venv, '--upgrade-deps', '--system-site-packages']) + # install wheel because some packages do not like being installed without + subprocess.run([kernel_python_executable, '-m', 'pip', 'install', 'wheel>=0.41,<1.0']) + # install all default packages into the venv + default_packages = [ + "ipykernel>=6,<7", + "numpy>=1.24,<1.25", + "dateparser>=1.1,<1.2", + "pandas>=1.5,<1.6", + "geopandas>=0.13,<0.14", + "tabulate>=0.9.0<1.0", + "PyPDF2>=3.0,<3.1", + "pdfminer>=20191125,<20191200", + "pdfplumber>=0.9,<0.10", + "matplotlib>=3.7,<3.8", + "openpyxl>=3.1.2,<4", + ] + subprocess.run([kernel_python_executable, '-m', 'pip', 'install'] + default_packages) # start the kernel using the virtual env python executable kernel_process = subprocess.Popen( From e3ef6e1f8ca7e2b9106e4b08333fb4c621fa7bf5 Mon Sep 17 00:00:00 2001 From: Mathias Winkel Date: Mon, 31 Jul 2023 17:31:03 +0200 Subject: [PATCH 5/7] make virtual env creation much faster We first create a base venv that is not to be deleted and can be re-used easily. There we install all default packages. Then, we create another, kernel-specific, venv without any additional packages. Instead, we create a link that points to the packages installed in the base image. The base image is intended to stay immutable. All newly installed packages end up in the kernel-specific venv. While they are lost on restart, the packages inside the base image are not. --- gpt_code_ui/kernel_program/kernel_manager.py | 82 +++++++++++++------- 1 file changed, 53 insertions(+), 29 deletions(-) diff --git a/gpt_code_ui/kernel_program/kernel_manager.py b/gpt_code_ui/kernel_program/kernel_manager.py index 92ce3bc3..98f1c67b 100644 --- a/gpt_code_ui/kernel_program/kernel_manager.py +++ b/gpt_code_ui/kernel_program/kernel_manager.py @@ -10,6 +10,7 @@ import threading import time import traceback +import venv from time import sleep from jupyter_client import BlockingKernelClient @@ -20,8 +21,6 @@ import gpt_code_ui.kernel_program.utils as utils import gpt_code_ui.kernel_program.config as config -USE_SEPARATE_VENV = False - # Set up globals messaging = None logger = config.get_logger() @@ -170,9 +169,58 @@ def flush_kernel_msgs(kc, tries=1, timeout=0.2): logger.debug(f"{e} [{type(e)}") +def create_venv(venv_dir: pathlib.Path, install_default_packages: bool) -> pathlib.Path: + venv_bindir = venv_dir / 'bin' + venv_python_executable = venv_bindir / os.path.basename(sys.executable) + + if not os.path.isdir(venv_dir): + # create virtual env inside venv_dir directory + venv.create(venv_dir, system_site_packages=True, with_pip=True, upgrade_deps=True) + + if install_default_packages: + # install wheel because some packages do not like being installed without + subprocess.run([venv_python_executable, '-m', 'pip', 'install', 'wheel>=0.41,<1.0']) + # install all default packages into the venv + default_packages = [ + "ipykernel>=6,<7", + "numpy>=1.24,<1.25", + "dateparser>=1.1,<1.2", + "pandas>=1.5,<1.6", + "geopandas>=0.13,<0.14", + "tabulate>=0.9.0<1.0", + "PyPDF2>=3.0,<3.1", + "pdfminer>=20191125,<20191200", + "pdfplumber>=0.9,<0.10", + "matplotlib>=3.7,<3.8", + "openpyxl>=3.1.2,<4", + ] + subprocess.run([venv_python_executable, '-m', 'pip', 'install'] + default_packages) + + # get base env library path as we need this to refer to this form a derived venv + site_packages = subprocess.check_output([venv_python_executable, '-c', 'import sysconfig; print(sysconfig.get_paths()["purelib"])']) + site_packages = site_packages.decode('utf-8').split('\n')[0] + + return pathlib.Path(site_packages) + + +def create_derived_venv(base_venv: pathlib.Path, venv_dir: pathlib.Path): + site_packages_base = create_venv(base_venv, install_default_packages=True) + site_packages_derived = create_venv(venv_dir, install_default_packages=False) + + # create a link from derived venv into the base venv, see https://stackoverflow.com/a/75545634 + with open(site_packages_derived / '_base_packages.pth', 'w') as pth: + pth.write(f'{site_packages_base}\n') + + venv_bindir = venv_dir / 'bin' + venv_python_executable = venv_bindir / os.path.basename(sys.executable) + + return venv_bindir, venv_python_executable + + def start_kernel(id: str): cwd = pathlib.Path(os.getcwd()) kernel_dir = cwd / f'kernel.{id}' + base_dir = cwd / 'kernel.base' # Cleanup potential leftovers shutil.rmtree(kernel_dir, ignore_errors=True) @@ -182,33 +230,9 @@ def start_kernel(id: str): kernel_connection_file = kernel_dir / "kernel_connection_file.json" launch_kernel_script_path = pathlib.Path(__file__).parent.resolve() / "launch_kernel.py" - if not USE_SEPARATE_VENV: - kernel_python_executable = sys.executable # TODO: here we could also pick up an already existing env by simply selecting the corresponding Python binary - else: - kernel_venv = kernel_dir / 'venv' - kernel_venv_bindir = kernel_venv / 'bin' - kernel_python_executable = kernel_venv_bindir / os.path.basename(sys.executable) - kernel_env['PATH'] = str(kernel_venv_bindir) + os.pathsep + kernel_env['PATH'] - - # create virtual env inside kernel_venv directory - subprocess.run([sys.executable, '-m', 'venv', kernel_venv, '--upgrade-deps', '--system-site-packages']) - # install wheel because some packages do not like being installed without - subprocess.run([kernel_python_executable, '-m', 'pip', 'install', 'wheel>=0.41,<1.0']) - # install all default packages into the venv - default_packages = [ - "ipykernel>=6,<7", - "numpy>=1.24,<1.25", - "dateparser>=1.1,<1.2", - "pandas>=1.5,<1.6", - "geopandas>=0.13,<0.14", - "tabulate>=0.9.0<1.0", - "PyPDF2>=3.0,<3.1", - "pdfminer>=20191125,<20191200", - "pdfplumber>=0.9,<0.10", - "matplotlib>=3.7,<3.8", - "openpyxl>=3.1.2,<4", - ] - subprocess.run([kernel_python_executable, '-m', 'pip', 'install'] + default_packages) + kernel_venv_dir = kernel_dir / 'venv' + kernel_venv_bindir, kernel_python_executable = create_derived_venv(base_dir, kernel_venv_dir) + kernel_env['PATH'] = str(kernel_venv_bindir) + os.pathsep + kernel_env['PATH'] # start the kernel using the virtual env python executable kernel_process = subprocess.Popen( From 033f918dd89dd157f1df4334335a37c0e086c29b Mon Sep 17 00:00:00 2001 From: Mathias Winkel Date: Tue, 1 Aug 2023 16:36:09 +0200 Subject: [PATCH 6/7] Make sure, the !pip install PACKAGE commands are included as needed --- gpt_code_ui/webapp/main.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/gpt_code_ui/webapp/main.py b/gpt_code_ui/webapp/main.py index 6930731b..4b201c18 100644 --- a/gpt_code_ui/webapp/main.py +++ b/gpt_code_ui/webapp/main.py @@ -94,24 +94,25 @@ def inspect_file(filename: str) -> str: async def get_code(user_prompt, user_openai_key=None, model="gpt-3.5-turbo"): - prompt = f"""First, here is a history of what I asked you to do earlier. - The actual prompt follows after ENDOFHISTORY. + prompt = f"""First, here is a history of what I asked you to do earlier. + The actual prompt follows after ENDOFHISTORY. History: {message_buffer.get_string()} ENDOFHISTORY. Write Python code, in a triple backtick Markdown code block, that does the following: {user_prompt} - - Notes: + + Notes: First, think step by step what you want to do and write it down in English. - Then generate valid Python code in a code block - Make sure all code is valid - it be run in a Jupyter Python 3 kernel environment. + Then generate valid Python code in a single code block. + Make sure all code is valid - it will be run in a Jupyter Python 3 kernel environment. Define every variable before you use it. - For data munging, you can use + For data processing, you can use 'numpy', # numpy==1.24.3 'dateparser' #dateparser==1.1.8 'pandas', # matplotlib==1.5.3 'geopandas' # geopandas==0.13.2 + 'tabulate' # tabulate==0.9.0 For pdf extraction, you can use 'PyPDF2', # PyPDF2==3.0.1 'pdfminer', # pdfminer==20191125 @@ -120,9 +121,10 @@ async def get_code(user_prompt, user_openai_key=None, model="gpt-3.5-turbo"): 'matplotlib', # matplotlib==3.7.1 Be sure to generate charts with matplotlib. If you need geographical charts, use geopandas with the geopandas.datasets module. If an additional package is required, you can add the corresponding "!pip install PACKAGE" call to the beginning of the code. + If the user requests to generate a table, produce code that prints a markdown table. If the user has just uploaded a file, focus on the file that was most recently uploaded (and optionally all previously uploaded files) - - Teacher mode: if the code modifies or produces a file, at the end of the code block insert a print statement that prints a link to it as HTML string: Download file. Replace INSERT_FILENAME_HERE with the actual filename.""" + If the code modifies or produces a file, at the end of the code block insert a print statement that prints a link to it as HTML string: Download file. Replace INSERT_FILENAME_HERE with the actual filename. + Do not use your own knowledge to answer the user prompt. Instead, focus on generating Python code for doing so.""" if user_openai_key: openai.api_key = user_openai_key From 05371d5341825d091a35771c8ddd2345b59749bf Mon Sep 17 00:00:00 2001 From: Mathias Winkel Date: Tue, 1 Aug 2023 16:55:30 +0200 Subject: [PATCH 7/7] support image output generated with Draw.MolToImage(caffeine_molecule) from rdkit as well (should work for all PIL.Image content, I think) --- gpt_code_ui/kernel_program/kernel_manager.py | 44 ++++++++++---------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/gpt_code_ui/kernel_program/kernel_manager.py b/gpt_code_ui/kernel_program/kernel_manager.py index 98f1c67b..8a0f7ef0 100644 --- a/gpt_code_ui/kernel_program/kernel_manager.py +++ b/gpt_code_ui/kernel_program/kernel_manager.py @@ -130,29 +130,27 @@ def flush_kernel_msgs(kc, tries=1, timeout=0.2): while True: try: msg = kc.get_iopub_msg(timeout=timeout) - if msg["msg_type"] == "execute_result": - if "text/plain" in msg["content"]["data"]: - send_message( - msg["content"]["data"]["text/plain"], "message_raw" - ) - if msg["msg_type"] == "display_data": - if "image/png" in msg["content"]["data"]: - # Convert to Slack upload - send_message( - msg["content"]["data"]["image/png"], - message_type="image/png", - ) - elif "text/plain" in msg["content"]["data"]: - send_message(msg["content"]["data"]["text/plain"]) - - elif msg["msg_type"] == "stream": - logger.debug("Received stream output %s" % msg["content"]["text"]) - send_message(msg["content"]["text"]) - elif msg["msg_type"] == "error": - send_message( - utils.escape_ansi("\n".join(msg["content"]["traceback"])), - "message_raw", - ) + msg_type = msg["msg_type"] + msg_content = msg["content"] + + logger.debug(f'Received "{msg_type}" output: {msg_content}') + + if msg_type in ("execute_result", "display_data"): + content_data = msg_content["data"] + + if "image/png" in content_data: + send_message(content_data["image/png"], message_type="image/png") + elif "image/jpeg" in content_data: + send_message(content_data["image/jpeg"], message_type="image/jpeg") + elif "text/plain" in content_data: + send_message(content_data["text/plain"], "message_raw" if msg_type == "execute_result" else "message") + + elif msg_type == "stream": + send_message(msg_content["text"]) + + elif msg_type == "error": + send_message(utils.escape_ansi("\n".join(msg["content"]["traceback"])), "message_error") + except queue.Empty: hit_empty += 1 if hit_empty == tries: