Or are you saying the agent itself would decide when to cache? You could technically code that today
async def set_cache_point(ctx: Context) -> None:
"""Use this when a very large message is introduced into the chat history that should be cached."""
memory = await ctx.get("memory")
messages = await memory.get_all()
for message in messages:
message.additional_kwargs["cache_control"] = ...
memory.set(messages)
await ctx.set("memory", memory)
...
agent = AgentWorkflow.from_tools_or_functions([..., set_cache_point], ...)