feat: layered precise generation for A4 report images

3-phase pipeline to solve LLM prompt overflow from too many OCR elements:
Phase 1 (generate_skeleton): compressed layout schema → skeleton JRXML
Phase 2 (refine_layout): sampled coordinates → pixel-level position tuning
Phase 3 (map_fields): OCR field names → replace $F{field_N} placeholders

Only triggered when layout_schema.total_rows > 0 on initial_generation intent.
Text requests and all other intents are unaffected (zero behavior change).
This commit is contained in:
2026-05-21 08:34:32 +08:00
parent 9bb011e429
commit 43a0542a11
14 changed files with 882 additions and 81 deletions
+10 -1
View File
@@ -80,6 +80,9 @@ NODE_LABELS = {
"handle_undo": "↩ 撤销操作",
"handle_reset": "🔄 重置会话",
"save_session": "💾 保存会话",
"generate_skeleton": "🏗 生成骨架",
"refine_layout": "📐 精调布局",
"map_fields": "🏷 映射字段",
}
INTENT_LABELS = {
@@ -133,6 +136,11 @@ def _process_uploaded_file(uploaded_file, suffix: str) -> dict:
if tt == "full_a4":
parsed_text = layout["description"]
parsed_type = "a4_template"
# 存储布局 schema 供分层精确生成使用
from backend.layout_analyzer import extract_layout_schema
schema = extract_layout_schema(layout)
st.session_state.agent_state["layout_schema"] = schema
st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
elif tt == "partial_rows":
parsed_type = "a4_partial"
if current_jrxml.strip():
@@ -290,7 +298,8 @@ def run_agent(user_input: str):
f"找到 {len(ctx)} 字符参考模板" if ctx else "未匹配到模板"
)
elif node_name in ("generate", "modify_jrxml", "correct_jrxml"):
elif node_name in ("generate", "modify_jrxml", "correct_jrxml",
"generate_skeleton", "refine_layout", "map_fields"):
jrxml = node_state.get("current_jrxml", "")
executed_nodes[-1]["detail"] = f"生成 {len(jrxml)} 字符 JRXML"