{"id":"01KFF0H3YRP9ZSM033AM0QJ47H","cid":"bafkreieep6xoxr6yu3g2pylz2hw72tny3a6jvc3rurywt5f6tjeny44z4y","type":"agent","properties":{"_profile_version":"v1","actions_required":["file:view","file:create","file:update","entity:view","entity:create","entity:update"],"description":"Extracts hierarchical document structure from any text-based file (txt, md, rst, etc.) using LLMs and creates entities with relationships","endpoint":"https://structure-extraction.arke.institute","endpoint_verified_at":"2026-01-21T00:51:07.079Z","input_schema":{"properties":{"file_id":{"description":"Source text-based file entity to extract structure from (txt, md, rst, etc.)","type":"string"},"options":{"description":"Extraction options","properties":{"custom_prompt":{"description":"Additional instructions for the LLM","type":"string"},"max_chunk_tokens":{"description":"Maximum tokens per chunk (default: 1024)","type":"number"},"model_name":{"description":"LLM model to use (default: meta-llama/Llama-3.3-70B-Instruct-Turbo)","type":"string"},"on_behalf_of":{"description":"Entity ID to act on behalf of (for permission context)","type":"string"}},"type":"object"}},"required":["file_id"],"type":"object"},"label":"Structure Extraction","output_description":"Creates a tree of entities representing the document's logical structure. The root entity represents the whole document (e.g. a 'book' or 'article'). Below it are structural divisions — parts, chapters, sections — determined by the LLM based on the document's actual organization. The specific types and depth depend on the document. Leaf-level sections are further split into 'chunk' entities (~1024 tokens each) suitable for embedding and search. Only leaf entities (those with no children) carry a 'text' property containing their content. Container entities do not have text — to read the document, collect text from the leaf entities in order. Every entity has a 'source_file' property pointing back to the input file, and 'start_line'/'end_line' properties indicating which lines of the source file it covers.","output_relationships":["Each child entity has an 'in' relationship pointing to its parent","Each entity has an 'extractedFrom' relationship pointing to the source file","Siblings are linked with 'prev' and 'next' relationships in document order","Each parent has 'contains' relationships pointing to all its direct children","To traverse the tree: start from the source file, follow 'contains' to find root entities, then recursively follow 'contains' downward"],"output_tree_example":"book 'Pride and Prejudice'\n├── intro 'Title Page' (lines 1-5)\n├── chapter 'Chapter 1' (lines 6-120)\n│   ├── section 'Opening Scene' (lines 6-60)\n│   │   ├── chunk 1 (lines 6-30, has text)\n│   │   └── chunk 2 (lines 31-60, has text)\n│   └── section 'First Meeting' (lines 61-120)\n│       ├── chunk 3 (lines 61-90, has text)\n│       └── chunk 4 (lines 91-120, has text)\n└── chapter 'Chapter 2' (lines 121-250)\n    └── ...","status":"active"},"relationships":[{"peer":"01KFF0H1KSR4SHHDQ7T2HXQEK6","peer_type":"collection","predicate":"collection"}],"ver":7,"created_at":"2026-01-21T00:51:02.008Z","ts":"2026-01-30T02:42:30.325Z","edited_by":{"method":"manual","user_id":"01KDZS52M5F9XS0ZPZQQXGPC9A"}}