
sidebar.wechat

sidebar.feishu
sidebar.chooseYourWayToJoin

sidebar.scanToAddConsultant
In AI-driven data analysis systems, how to make LLM-generated code execute safely and efficiently is a core challenge. AskTable's ReportAgent uses an innovative architecture: JSX compilation + Python sandbox execution, achieving dynamic generation of data reports.
ReportAgent is responsible for generating data report components. Its core process includes:
load_dataframe() references, verify data sourcesclass ReportAgent(DBAgent):
def __init__(self, datasource: DataSourceAdmin, assumed_role: RoleAdmin | None = None):
super().__init__(
prompt_name="agent/report_generator",
datasource=datasource,
assumed_role=assumed_role,
)
self.add_tool(self.show_table)
self.add_tool(self.search_metadata)
self.add_tool(self.execute_sql)
self.set_output_parser(self.output_parser)
self.compiled_code: str | None = None
self.source_code: str | None = None
self.referenced_dataframes: list[str] = []
Code generated by LLM needs to be wrapped in <code>...</code> tags:
def output_parser(self, output: str) -> None:
# 1. Extract code block
pattern = r"<code>(.*?)</code>"
match = re.search(pattern, output, re.DOTALL)
if not match:
raise ValueError("Invalid output format. Expected: <code>...</code>")
code = match.group(1).strip()
# 2. Extract DataFrame references
load_df_pattern = r"load_dataframe\(\s*['\"]( df_[A-Za-z0-9]+)['\"]\s*\)"
referenced_dataframes = re.findall(load_df_pattern, code)
if not referenced_dataframes:
raise ValueError("No load_dataframe('df_id') pattern found in code.")
# 3. Compile JSX code
self.compiled_code = compile_jsx(code)
self.source_code = code
# 4. Verify DataFrame exists
missing_ids = set(referenced_dataframes) - set(self.data_workspace.keys())
if missing_ids:
raise ValueError(f"Referenced dataframes {missing_ids} are not in the data workspace")
self.referenced_dataframes = referenced_dataframes
JSX compilation is implemented via a remote service to avoid introducing Node.js dependencies in the Python environment:
BASE_URL = "http://localhost:5300/jsx"
def compile_jsx(code: str) -> str:
response = requests.post(BASE_URL, json={"code": code})
if response.status_code != 200:
raise Exception(response.json()["error"])
return response.json()["compiledCode"]
The compilation service uses Babel for JSX transpilation:
// Node.js compilation service
const express = require('express');
const babel = require('@babel/core');
app.post('/jsx', (req, res) => {
const { code } = req.body;
try {
const result = babel.transformSync(code, {
presets: ['@babel/preset-react'],
plugins: ['@babel/plugin-transform-modules-commonjs']
});
res.json({ compiledCode: result.code });
} catch (error) {
res.status(400).json({ error: error.message });
}
});
<code>
import { BarChart } from '@/components/charts';
function SalesReport() {
const data = load_dataframe('df_abc123');
return (
<div className="report">
<h2>Monthly Sales Report</h2>
<BarChart
data={data}
xField="month"
yField="sales"
title="2024 Sales Trend"
/>
<p>Total Sales: {data.reduce((sum, row) => sum + row.sales, 0)}</p>
</div>
);
}
</code>
For data processing and analysis tasks, ReportAgent supports executing Python code:
class CorrAnalyzerAgent(DBAgent):
def __init__(self, datasource, assumed_role=None, preference=None, user_profile=None):
super().__init__(
prompt_name="agent/analysis_report_generator",
datasource=datasource,
assumed_role=assumed_role,
)
self.add_tool(self.execute_python)
self.executor = PythonExecutor(packages=["pandas", "numpy", "scipy"])
class PythonExecutor:
def __init__(self, packages: list[str] = DEFAULT_PACKAGES):
self._base_url = BASE_URL
self._packages = packages
self._executor_id = self._create_executor(packages)
def _create_executor(self, packages: list[str]) -> str:
"""Create isolated executor instance"""
response = requests.post(
f"{self._base_url}/python/executor",
json={"packages": packages},
)
if response.status_code != 201:
raise Exception(f"Failed to create executor: {response.text}")
return response.json()["executor_id"]
Use Pickle + Base64 for DataFrame transmission:
def send_dataframes(self, dataframes: dict[str, pd.DataFrame]) -> None:
for df_name, df in dataframes.items():
# Serialize DataFrame
pickled_df = base64.b64encode(pickle.dumps(df)).decode()
# Inject into execution environment
code = dedent(f"""
import pickle
import base64
df = pickle.loads(base64.b64decode('{pickled_df}'))
_saved_dataframes['{df_name}'] = {{
"df": df,
"description": "initial dataframe"
}}
""")
self._execute(code=code)
def execute(self, code: str, variables: dict = {}, dataframes: dict = {}) -> CodeExecutionResponse:
# 1. Inject variables
if variables:
pickled_vars = base64.b64encode(pickle.dumps(variables)).decode()
code_inject = f"""
vars_dict = pickle.loads(base64.b64decode('{pickled_vars}'))
locals().update(vars_dict)
"""
self._execute(code=code_inject)
# 2. Inject DataFrame
if dataframes:
self.send_dataframes(dataframes)
# 3. Execute user code
return self._execute(code=code)
def _execute(self, code: str) -> CodeExecutionResponse:
response = requests.post(
f"{self._base_url}/python/executor/{self._executor_id}/code",
json={"code": code},
)
response_json = response.json()
# Deserialize DataFrame
dataframes = {}
for df_name, df_data in response_json.get("dataframes", {}).items():
raw_df = pickle.loads(base64.b64decode(df_data))
if raw_df:
dataframes[df_name] = ResponseDataframe(
df=raw_df.get("df"),
description=raw_df.get("description"),
sql=None,
)
return CodeExecutionResponse(
stdout=response_json.get("stdout"),
error=response_json.get("error"),
executor_id=response_json.get("executor_id"),
dataframes=dataframes,
)
code = """
import pandas as pd
# Load data
df = load_dataframe('df_raw_sales')
# Data cleaning
df_cleaned = df.dropna()
df_cleaned['date'] = pd.to_datetime(df_cleaned['date'])
df_cleaned = df_cleaned[df_cleaned['amount'] > 0]
# Save result
df_id = save_dataframe(df_cleaned, "Cleaned sales data")
print(f"Cleaning complete, {len(df_cleaned)} records processed")
"""
result = executor.execute(code)
print(result["stdout"]) # "Cleaning complete, 1234 records processed"
code = """
import pandas as pd
import numpy as np
from scipy import stats
# Load data
df = load_dataframe('df_sales')
# Calculate statistical metrics
mean_sales = df['amount'].mean()
median_sales = df['amount'].median()
std_sales = df['amount'].std()
# Correlation analysis
corr = df[['amount', 'quantity']].corr()
print(f"Mean sales: {mean_sales:.2f}")
print(f"Median: {median_sales:.2f}")
print(f"Standard deviation: {std_sales:.2f}")
print(f"Correlation coefficient:\n{corr}")
"""
result = executor.execute(code)
code = """
import pandas as pd
# Load data
df = load_dataframe('df_orders')
# Aggregate by month
df['month'] = pd.to_datetime(df['date']).dt.to_period('M')
monthly_sales = df.groupby('month').agg({
'amount': 'sum',
'order_id': 'count'
}).reset_index()
monthly_sales.columns = ['Month', 'Sales', 'Order Count']
# Save result
df_id = save_dataframe(monthly_sales, "Monthly sales summary")
"""
result = executor.execute(code)
Each executor instance can only use packages specified at creation:
executor = PythonExecutor(packages=["pandas", "numpy", "scipy"])
# Cannot use dangerous packages like requests, os
Each executor instance runs independently without interference:
executor1 = PythonExecutor(packages=["pandas"])
executor2 = PythonExecutor(packages=["numpy"])
# executor1 and executor2 are completely isolated
Executor server sets timeout limits to prevent infinite loops:
# Server configuration
EXECUTION_TIMEOUT = 30 # 30 second timeout
Limit memory and CPU usage:
# Docker container resource limits
docker run --memory="512m" --cpus="1.0" python-executor
Reuse executor instances within the same session to avoid repeated creation:
class CorrAnalyzerAgent:
def __init__(self):
# Create once, use many times
self.executor = PythonExecutor(packages=["pandas", "numpy", "scipy"])
Executor internally caches DataFrames to avoid repeated transmission:
# First transmission
executor.send_dataframes({"df_1": df})
# Subsequent code can directly use it
code = "df = load_dataframe('df_1')"
Combine multiple small operations into one execution:
code = """
df1 = load_dataframe('df_1')
df2 = load_dataframe('df_2')
result = pd.merge(df1, df2, on='id')
save_dataframe(result, "Merge result")
"""
AskTable's JSX compilation and Python sandbox execution system achieves safe and efficient code execution through the following technologies:
This architecture not only ensures security but also provides a reliable execution environment for LLM-generated code. It is key infrastructure for AI-driven data analysis systems.
sidebar.noProgrammingNeeded
sidebar.startFreeTrial