Skip to content

Commit d5014bc

Browse files
committed
perf(startup): defer heavy imports in 0002_data.py
The IPython startup script imports pandas, matplotlib.pyplot, e2b_charts, and orjson at module load time. Every Python kernel pays that cost in resident memory, regardless of whether the customer ever displays a DataFrame, Figure, or large JSON object — and that resident heap is captured in the snapshot at every pause. Move the heavy imports into the formatter __call__ bodies so they are only resolved when a matching object is actually rendered. Top-level imports are limited to IPython + traitlets, which is what the formatter class definitions actually need. IPython invokes every registered formatter for every displayed object, so a naive "import pandas" at the top of __call__ would shift the cost from kernel boot to the first cell with any output. Instead, gate on sys.modules.get("pandas"): a pandas.DataFrame cannot exist unless the user has already imported pandas. The same sentinel works for the matplotlib + e2b_charts dependency chain in E2BChartFormatter. Sessions that never touch pandas/matplotlib now never load them. E2BDataFormatter previously declared type_printers={pandas.DataFrame: …}, which forced pandas at class-definition time. The new version dispatches via isinstance() inside __call__ — slightly more work per formatted object, but the heavy dependencies stay unloaded until the user genuinely needs them. Signed-off-by: Nikita Kalyazin <nikita.kalyazin@e2b.dev>
1 parent b355e7c commit d5014bc

1 file changed

Lines changed: 35 additions & 32 deletions

File tree

template/startup_scripts/0002_data.py

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,33 @@
1-
import pandas
2-
from matplotlib.pyplot import Figure
1+
import sys
2+
33
import IPython
44
from IPython.core.formatters import BaseFormatter, JSONFormatter
55
from traitlets.traitlets import Unicode, ObjectName
66

7-
from e2b_charts import chart_figure_to_dict
8-
import orjson
9-
10-
11-
def _figure_repr_e2b_chart_(self: Figure):
12-
"""
13-
This method is used to extract data from the figure object to a dictionary
14-
"""
15-
# Get all Axes objects from the Figure
16-
try:
17-
return chart_figure_to_dict(self)
18-
except: # noqa: E722
19-
return {}
20-
21-
22-
def _dataframe_repr_e2b_data_(self: pandas.DataFrame):
23-
result = self.to_dict(orient="list")
24-
for key, value in result.items():
25-
# Check each column's values
26-
result[key] = [
27-
v.isoformat() if isinstance(v, pandas.Timestamp) else v for v in value
28-
]
29-
return result
30-
317

328
class E2BDataFormatter(BaseFormatter):
339
format_type = Unicode("e2b/data")
3410

3511
print_method = ObjectName("_repr_e2b_data_")
3612
_return_type = (dict, str)
3713

38-
type_printers = {pandas.DataFrame: _dataframe_repr_e2b_data_}
14+
def __call__(self, obj):
15+
# IPython invokes every registered formatter for every displayed
16+
# object. Gate on sys.modules so a non-DataFrame output (e.g. an
17+
# int from `1 + 1`) doesn't pay the pandas import cost — a
18+
# pandas.DataFrame can only exist if the user already imported
19+
# pandas.
20+
pandas = sys.modules.get("pandas")
21+
if pandas is None or not isinstance(obj, pandas.DataFrame):
22+
return super().__call__(obj)
23+
24+
result = obj.to_dict(orient="list")
25+
for key, value in result.items():
26+
# Check each column's values
27+
result[key] = [
28+
v.isoformat() if isinstance(v, pandas.Timestamp) else v for v in value
29+
]
30+
return result
3931

4032

4133
class E2BChartFormatter(BaseFormatter):
@@ -45,19 +37,30 @@ class E2BChartFormatter(BaseFormatter):
4537
_return_type = (dict, str)
4638

4739
def __call__(self, obj):
48-
# Figure object is for some reason removed on execution of the cell,
49-
# so it can't be used in type_printers or with top-level import
40+
# Same sys.modules gate as E2BDataFormatter: a matplotlib Figure
41+
# can only exist if the user already imported matplotlib.
42+
if sys.modules.get("matplotlib") is None:
43+
return super().__call__(obj)
44+
5045
from matplotlib.pyplot import Figure
5146

52-
if isinstance(obj, Figure):
53-
return _figure_repr_e2b_chart_(obj)
54-
return super().__call__(obj)
47+
if not isinstance(obj, Figure):
48+
return super().__call__(obj)
49+
50+
from e2b_charts import chart_figure_to_dict
51+
52+
try:
53+
return chart_figure_to_dict(obj)
54+
except: # noqa: E722
55+
return {}
5556

5657

5758
class E2BJSONFormatter(JSONFormatter):
5859
def __call__(self, obj):
5960
if isinstance(obj, (list, dict)):
6061
try:
62+
import orjson
63+
6164
return orjson.loads(
6265
orjson.dumps(
6366
obj, option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_NON_STR_KEYS

0 commit comments

Comments
 (0)