How to Build a Memory-Driven AI Agent with Short-Term, Long-Term, and Episodic Memory

def openai_chat(system: str, user: str) -> str:
   resp = client.chat.completions.create(
       model=OPENAI_MODEL,
       messages=(
           {"role": "system", "content": system},
           {"role": "user", "content": user},
       ),
       temperature=0.3
   )
   return resp.choices(0).message.content


def heuristic_responder(context: str, question: str) -> str:
   lessons = re.findall(r"Lessons=(.*)", context)
   avoid = re.findall(r"Avoid=(.*)", context)
   ltm_lines = (ln for ln in context.splitlines() if ln.startswith("(LTM:"))


   steps = ()
   if lessons:
       for chunk in lessons(:2):
           for s in (x.strip() for x in chunk.split(";") if x.strip()):
               steps.append(s)
   for ln in ltm_lines:
       if "(LTM:procedure)" in ln.lower():
           proc = re.sub(r"^(LTM:procedure)s*", "", ln, flags=re.I)
           proc = proc.split("(salience=")(0).strip()
           for part in (p.strip() for p in proc.split("|") if p.strip()):
               steps.append(part)


   steps = steps(:8) if steps else ("Clarify the target outcome and constraints.", "Use semantic recall + episodic lessons to propose a plan.", "Execute, then store lessons learned.")


   pitfalls = ()
   if avoid:
       for chunk in avoid(:2):
           for s in (x.strip() for x in chunk.split(";") if x.strip()):
               pitfalls.append(s)
   pitfalls = pitfalls(:6)


   prefs = (ln for ln in ltm_lines if "(LTM:preference)" in ln.lower())
   facts = (ln for ln in ltm_lines if "(LTM:fact)" in ln.lower() or "(LTM:constraint)" in ln.lower())


   out = ()
   out.append("Answer (memory-informed, offline fallback)n")
   if prefs:
       out.append("Relevant preferences/constraints remembered:")
       for ln in (prefs + facts)(:6):
           out.append(" - " + ln.split(") ",1)(1).split(" (salience=")(0).strip())
       out.append("")
   out.append("Recommended approach:")
   for i, s in enumerate(steps, 1):
       out.append(f" {i}. {s}")
   if pitfalls:
       out.append("nPitfalls to avoid (from episodic traces):")
       for p in pitfalls:
           out.append(" - " + p)
   out.append("n(If you add an API key, the same memory context will feed a stronger LLM for higher-quality responses.)")
   return "n".join(out).strip()


class MemoryAugmentedAgent:
   def __init__(self, mem: MemoryEngine):
       self.mem = mem


   def answer(self, question: str) -> Dict(str, Any):
       pack = self.mem.retrieve(question)
       context = self.mem.build_context(question, pack)


       system = (
           "You are a memory-augmented agent. Use the provided memory context.n"
           "Prioritize:n"
           "1) Episodic lessons (what worked before)n"
           "2) Long-term facts/preferences/proceduresn"
           "3) Short-term conversation staten"
           "Be concrete and stepwise. If memory conflicts, state the uncertainty."
       )


       if USE_OPENAI:
           reply = openai_chat(system=system, user=context + "nnUser question:n" + question)
       else:
           reply = heuristic_responder(context=context, question=question)


       self.mem.st_add("user", question, kind="message")
       self.mem.st_add("assistant", reply, kind="message")


       return {"reply": reply, "pack": pack, "context": context}


mem = MemoryEngine()
agent = MemoryAugmentedAgent(mem)


mem.ltm_add(kind="preference", text="Prefer concise, structured answers with steps and bullet points when helpful.", tags=("style"), pinned=True)
mem.ltm_add(kind="preference", text="Prefer solutions that run on Google Colab without extra setup.", tags=("environment"), pinned=True)
mem.ltm_add(kind="procedure", text="When building agent memory: embed items, store with salience/novelty policy, retrieve with hybrid semantic+episodic, and decay overuse to avoid repetition.", tags=("agent-memory"))
mem.ltm_add(kind="constraint", text="If no API key is available, provide a runnable offline fallback instead of failing.", tags=("robustness"), pinned=True)


mem.episode_add(
   task="Build an agent memory layer for troubleshooting Python errors in Colab",
   constraints={"offline_ok": True, "single_notebook": True},
   plan=(
       "Capture short-term chat context",
       "Store durable constraints/preferences in long-term vector memory",
       "After solving, extract lessons into episodic traces",
       "On new tasks, retrieve top episodic lessons + semantic facts"
   ),
   actions=(
       {"type":"analysis", "detail":"Identified recurring failure: missing installs and version mismatches."},
       {"type":"action", "detail":"Added pip install block + minimal fallbacks."},
       {"type":"action", "detail":"Added memory policy: pin constraints, drop low-salience items."}
   ),
   result="Notebook became robust: runs with or without external keys; troubleshooting quality improved with episodic lessons.",
   outcome_score=0.90,
   lessons=(
       "Always include a pip install cell for non-standard deps.",
       "Pin hard constraints (e.g., offline fallback) into long-term memory.",
       "Store a post-task 'lesson list' as an episodic trace for reuse."
   ),
   failure_modes=(
       "Assuming an API key exists and crashing when absent.",
       "Storing too much noise into long-term memory causing irrelevant recall context."
   ),
   tags=("colab","robustness","memory")
)


print("✅ Memory engine initialized.")
print(f"   LTM items: {len(mem.ltm)} | Episodes: {len(mem.episodes)} | ST items: {len(mem.short_term)}")


q1 = "I want to build memory for an agent in Colab. What should I store and how do I retrieve it?"
out1 = agent.answer(q1)
print("n" + "="*90)
print("Q1 REPLYn")
print(out1("reply")(:1800))


q2 = "How do I avoid my agent repeating the same memory over and over?"
out2 = agent.answer(q2)
print("n" + "="*90)
print("Q2 REPLYn")
print(out2("reply")(:1800))


def simple_outcome_eval(text: str) -> float:
   hits = 0
   for kw in ("decay", "usage", "penalty", "novelty", "prune", "retrieve", "episodic", "semantic"):
       if kw in text.lower():
           hits += 1
   return float(np.clip(hits/8.0, 0.0, 1.0))


score2 = simple_outcome_eval(out2("reply"))
mem.episode_add(
   task="Prevent repetitive recall in a memory-augmented agent",
   constraints={"must_be_simple": True, "runs_in_colab": True},
   plan=(
       "Track usage counts per memory item",
       "Apply usage-based penalty during ranking",
       "Boost novelty during storage to reduce duplicates",
       "Optionally prune low-salience memories"
   ),
   actions=(
       {"type":"design", "detail":"Added usage-based penalty 1/(1+alpha*usage)."},
       {"type":"design", "detail":"Used novelty = 1 - max_similarity at store time."}
   ),
   result=out2("reply")(:600),
   outcome_score=score2,
   lessons=(
       "Penalize overused memories during ranking (usage decay).",
       "Enforce novelty threshold at storage time to prevent duplicates.",
       "Keep episodic lessons distilled to avoid bloated recall context."
   ),
   failure_modes=(
       "No usage tracking, causing one high-similarity memory to dominate forever.",
       "Storing raw chat logs as LTM instead of distilled summaries."
   ),
   tags=("ranking","decay","policy")
)


cons = mem.consolidate()
print("n" + "="*90)
print("CONSOLIDATION RESULT:", cons)


print("n" + "="*90)
print("LTM (top rows):")
display(mem.ltm_df().head(12))


print("n" + "="*90)
print("EPISODES (top rows):")
display(mem.episodes_df().head(12))


def debug_retrieval(query: str):
   pack = mem.retrieve(query)
   ctx = mem.build_context(query, pack)
   sem = ()
   for mid, sc in pack("semantic_scored"):
       it = mem.ltm(mid)
       sem.append({"mem_id": mid, "score": sc, "kind": it.kind, "salience": it.salience, "usage": it.usage, "text": it.text(:160)})
   ep = ()
   for eid, sc in pack("episodic_scored"):
       e = mem.episodes(eid)
       ep.append({"ep_id": eid, "score": sc, "outcome": e.outcome_score, "task": e.task(:140), "lessons": " | ".join(e.lessons(:4))})
   return ctx, pd.DataFrame(sem), pd.DataFrame(ep)


print("n" + "="*90)
ctx, sem_df, ep_df = debug_retrieval("How do I design an agent memory policy for storage and retrieval?")
print(ctx(:1600))
print("nTop semantic hits:")
display(sem_df)
print("nTop episodic hits:")
display(ep_df)


print("n✅ Done. You now have working short-term, long-term vector, and episodic memory with storage/retrieval policies in one Colab snippet.")
How to Build a Memory-Driven AI Agent with Short-Term, Long-Term, and Episodic Memory

Donald Trump closes Kennedy Center for two-year renovation

NVIDIA AI brings Nemotron-3-Nano-30B to NVFP4 with Quantization Aware Distillation (QAD) for efficient logic inference.

Related Articles

Leave a Comment Cancel Reply