@dataclass
class MemoryItem:
memory_id: int
matter: str
entity: str
slot: str
worth: str
textual content: str
def build_memory_bank() -> Checklist[MemoryItem]:
entities = [
{
"entity": "Astra",
"topic": "robotics",
"facts": {
"battery": "18 hours",
"sensor": "LiDAR",
"country": "Japan",
"release_year": "2023",
"specialty": "warehouse navigation",
},
},
{
"entity": "Orion",
"topic": "astronomy",
"facts": {
"telescope": "infrared array",
"country": "Chile",
"discovery_year": "2019",
"target": "exoplanet atmospheres",
"aperture": "8 meters",
},
},
{
"entity": "Vita",
"topic": "biomedicine",
"facts": {
"compound": "VX-17",
"trial_phase": "Phase II",
"country": "Canada",
"target": "inflammatory markers",
"delivery": "oral capsule",
},
},
{
"entity": "Nimbus",
"topic": "climate",
"facts": {
"satellite": "polar orbiter",
"country": "Norway",
"launch_year": "2022",
"instrument": "microwave radiometer",
"mission": "sea ice monitoring",
},
},
{
"entity": "Atlas",
"topic": "logistics",
"facts": {
"fleet_size": "240 trucks",
"hub": "Muscat",
"software": "predictive routing",
"fuel_policy": "hybrid-first",
"region": "GCC",
},
},
{
"entity": "Lumos",
"topic": "materials",
"facts": {
"alloy": "Ti-6Al-4V",
"process": "laser sintering",
"density": "4.43 g/cm3",
"country": "Germany",
"use_case": "aerospace brackets",
},
},
{
"entity": "Cedar",
"topic": "agriculture",
"facts": {
"crop": "wheat",
"irrigation": "drip control",
"country": "India",
"yield_gain": "12 percent",
"soil_sensor": "capacitive probe",
},
},
{
"entity": "Pulse",
"topic": "healthcare",
"facts": {
"device": "ECG patch",
"battery": "7 days",
"country": "USA",
"connectivity": "Bluetooth Low Energy",
"use_case": "arrhythmia screening",
},
},
]
phrasing_templates = [
"{entity} in {topic} uses {value} for {slot}.",
"The {slot} associated with {entity} is {value}.",
"{entity} has {slot}: {value}.",
"For {entity}, the recorded {slot} is {value}.",
"Reference note: {entity} -> {slot} = {value}.",
]
distractor_templates = [
"{entity} was discussed in a briefing about cross-domain innovation.",
"{entity} has been compared with several other projects in recent reports.",
"A summary note mentions {entity} among notable initiatives.",
"{entity} appears in a high-level update without technical details.",
"Stakeholders reviewed {entity} in a strategic planning session.",
]
memory_bank = []
memory_id = 0
for merchandise in entities:
entity = merchandise["entity"]
matter = merchandise["topic"]
for slot, worth in merchandise["facts"].objects():
for t in phrasing_templates:
textual content = t.format(entity=entity, matter=matter, slot=slot, worth=worth)
memory_bank.append(MemoryItem(
memory_id=memory_id,
matter=matter,
entity=entity,
slot=slot,
worth=worth,
textual content=textual content
))
memory_id += 1
for t in distractor_templates:
textual content = t.format(entity=entity)
memory_bank.append(MemoryItem(
memory_id=memory_id,
matter=matter,
entity=entity,
slot="distractor",
worth="n/a",
textual content=textual content
))
memory_id += 1
extra_noise = [
"General note: system maintenance occurred on Tuesday.",
"A committee discussed budget timelines and operational readiness.",
"The archive includes summaries of projects across multiple departments.",
"No relevant technical value is stated in this memory.",
"A status update mentioned partnerships and future opportunities.",
"An unrelated note references shipping delays and staffing changes.",
"Background memo: the team reviewed dashboards and reporting cadence.",
"This memory contains no answer-bearing facts.",
]
for textual content in extra_noise:
memory_bank.append(MemoryItem(
memory_id=memory_id,
matter="noise",
entity="none",
slot="distractor",
worth="n/a",
textual content=textual content
))
memory_id += 1
return memory_bank
memory_bank = build_memory_bank()
memory_texts = [m.text for m in memory_bank]
memory_embeddings = embed_texts(memory_texts)
def build_queries(memory_bank: Checklist[MemoryItem]) -> Checklist[Dict[str, Any]]:
patterns = [
"What is the {slot} of {entity}?",
"Which {slot} does {entity} have?",
"Tell me the {slot} for {entity}.",
"Can you recall the {slot} associated with {entity}?",
"What was recorded as the {slot} of {entity}?",
]
queries = []
qid = 0
for m in memory_bank:
if m.slot == "distractor":
proceed
q = random.alternative(patterns).format(slot=m.slot.exchange("_", " "), entity=m.entity)
queries.append({
"query_id": qid,
"question": q,
"entity": m.entity,
"slot": m.slot,
"gold_value": m.worth,
"gold_memory_id": m.memory_id,
"gold_text": m.textual content,
"matter": m.matter,
})
qid += 1
random.shuffle(queries)
return queries
queries = build_queries(memory_bank)
query_texts = [q["query"] for q in queries]
query_embeddings = embed_texts(query_texts)
