feat: layered precise generation for A4 report images
3-phase pipeline to solve LLM prompt overflow from too many OCR elements:
Phase 1 (generate_skeleton): compressed layout schema → skeleton JRXML
Phase 2 (refine_layout): sampled coordinates → pixel-level position tuning
Phase 3 (map_fields): OCR field names → replace $F{field_N} placeholders
Only triggered when layout_schema.total_rows > 0 on initial_generation intent.
Text requests and all other intents are unaffected (zero behavior change).
This commit is contained in:
@@ -80,6 +80,9 @@ NODE_LABELS = {
|
||||
"handle_undo": "↩ 撤销操作",
|
||||
"handle_reset": "🔄 重置会话",
|
||||
"save_session": "💾 保存会话",
|
||||
"generate_skeleton": "🏗 生成骨架",
|
||||
"refine_layout": "📐 精调布局",
|
||||
"map_fields": "🏷 映射字段",
|
||||
}
|
||||
|
||||
INTENT_LABELS = {
|
||||
@@ -133,6 +136,11 @@ def _process_uploaded_file(uploaded_file, suffix: str) -> dict:
|
||||
if tt == "full_a4":
|
||||
parsed_text = layout["description"]
|
||||
parsed_type = "a4_template"
|
||||
# 存储布局 schema 供分层精确生成使用
|
||||
from backend.layout_analyzer import extract_layout_schema
|
||||
schema = extract_layout_schema(layout)
|
||||
st.session_state.agent_state["layout_schema"] = schema
|
||||
st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
|
||||
elif tt == "partial_rows":
|
||||
parsed_type = "a4_partial"
|
||||
if current_jrxml.strip():
|
||||
@@ -290,7 +298,8 @@ def run_agent(user_input: str):
|
||||
f"找到 {len(ctx)} 字符参考模板" if ctx else "未匹配到模板"
|
||||
)
|
||||
|
||||
elif node_name in ("generate", "modify_jrxml", "correct_jrxml"):
|
||||
elif node_name in ("generate", "modify_jrxml", "correct_jrxml",
|
||||
"generate_skeleton", "refine_layout", "map_fields"):
|
||||
jrxml = node_state.get("current_jrxml", "")
|
||||
executed_nodes[-1]["detail"] = f"生成 {len(jrxml)} 字符 JRXML"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user