Files
koios/utils/neo4j-reset.py
Robert Helewka 7859264359 Add Neo4j schema initialization and validation scripts
- Introduced `neo4j-schema-init.py` for creating the foundational schema for the personal knowledge graph used by multiple AI assistants.
- Implemented functionality for creating constraints, indexes, and sample nodes, along with comprehensive testing of the schema.
- Added `neo4j-validate.py` to perform validation checks on the Neo4j knowledge graph, including constraints, indexes, sample nodes, relationships, and junk data detection.
- Enhanced logging for better traceability and debugging during schema initialization and validation processes.
2026-03-06 14:11:52 +00:00

157 lines
5.4 KiB
Python

"""
Neo4j Database Reset
====================
Wipes all nodes, relationships, constraints, and indexes from the database.
Use before re-running neo4j-schema-init.py for a clean slate.
Usage:
python neo4j-reset.py
python neo4j-reset.py --uri bolt://ariel.incus:7687
python neo4j-reset.py --uri bolt://ariel.incus:7687 --force
Environment Variables (optional):
NEO4J_URI - Bolt URI (default: bolt://localhost:7687)
NEO4J_USER - Username (default: neo4j)
NEO4J_PASSWORD - Password (will prompt if not set)
"""
import argparse
import getpass
import os
import sys
from neo4j import GraphDatabase
from neo4j.exceptions import AuthError, ServiceUnavailable
def get_credentials(args):
uri = args.uri or os.environ.get("NEO4J_URI")
if not uri:
uri = input("Neo4j URI [bolt://localhost:7687]: ").strip() or "bolt://localhost:7687"
user = args.user or os.environ.get("NEO4J_USER")
if not user:
user = input("Neo4j username [neo4j]: ").strip() or "neo4j"
password = os.environ.get("NEO4J_PASSWORD")
if not password:
password = getpass.getpass("Neo4j password: ")
if not password:
print("ERROR: Password is required")
sys.exit(1)
return uri, user, password
def reset_database(driver):
"""Drop all constraints, indexes, and delete all data."""
with driver.session() as session:
# 1. Count what exists before wiping
node_count = session.run("MATCH (n) RETURN count(n) AS c").single()["c"]
rel_count = session.run("MATCH ()-[r]->() RETURN count(r) AS c").single()["c"]
constraints = list(session.run("SHOW CONSTRAINTS YIELD name RETURN name"))
indexes = list(session.run("SHOW INDEXES YIELD name, type WHERE type <> 'LOOKUP' RETURN name"))
print(f"\nCurrent database contents:")
print(f" Nodes: {node_count}")
print(f" Relationships:{rel_count}")
print(f" Constraints: {len(constraints)}")
print(f" Indexes: {len(indexes)}")
if node_count == 0 and len(constraints) == 0 and len(indexes) == 0:
print("\nDatabase is already empty. Nothing to reset.")
return
# 2. Drop all constraints
dropped_constraints = 0
for record in constraints:
name = record["name"]
try:
session.run(f"DROP CONSTRAINT {name} IF EXISTS")
dropped_constraints += 1
except Exception as e:
print(f" WARNING: Could not drop constraint {name}: {e}")
print(f"\n Dropped {dropped_constraints} constraints")
# 3. Drop all non-lookup indexes
dropped_indexes = 0
for record in indexes:
name = record["name"]
try:
session.run(f"DROP INDEX {name} IF EXISTS")
dropped_indexes += 1
except Exception as e:
print(f" WARNING: Could not drop index {name}: {e}")
print(f" Dropped {dropped_indexes} indexes")
# 4. Delete all nodes and relationships (batch for large DBs)
deleted = 1
total_deleted = 0
while deleted > 0:
result = session.run(
"MATCH (n) WITH n LIMIT 10000 DETACH DELETE n RETURN count(*) AS deleted"
)
deleted = result.single()["deleted"]
total_deleted += deleted
print(f" Deleted {total_deleted} nodes (and their relationships)")
# 5. Verify clean
remaining = session.run("MATCH (n) RETURN count(n) AS c").single()["c"]
remaining_constraints = len(list(session.run("SHOW CONSTRAINTS")))
remaining_indexes = len(list(session.run(
"SHOW INDEXES YIELD name, type WHERE type <> 'LOOKUP' RETURN name"
)))
print(f"\nAfter reset:")
print(f" Nodes: {remaining}")
print(f" Constraints: {remaining_constraints}")
print(f" Indexes: {remaining_indexes}")
if remaining == 0 and remaining_constraints == 0 and remaining_indexes == 0:
print("\n✓ Database is clean. Ready for neo4j-schema-init.py")
else:
print("\n⚠ Some items remain — you may need to run this again")
def main():
parser = argparse.ArgumentParser(
description="Reset Neo4j database — wipe all data, constraints, and indexes"
)
parser.add_argument("--uri", "-u", help="Neo4j Bolt URI")
parser.add_argument("--user", "-U", help="Neo4j username")
parser.add_argument("--force", "-f", action="store_true",
help="Skip confirmation prompt")
args = parser.parse_args()
uri, user, password = get_credentials(args)
try:
driver = GraphDatabase.driver(uri, auth=(user, password))
# Test connection
with driver.session() as session:
session.run("RETURN 1")
print(f"✓ Connected to {uri}")
except AuthError:
print(f"✗ Authentication failed for {uri}")
sys.exit(1)
except ServiceUnavailable:
print(f"✗ Cannot connect to {uri}")
sys.exit(1)
if not args.force:
confirm = input(f"\n⚠ This will DELETE EVERYTHING in {uri}. Type 'yes' to confirm: ")
if confirm.strip().lower() != "yes":
print("Cancelled.")
sys.exit(0)
try:
reset_database(driver)
except Exception as e:
print(f"ERROR: {e}")
sys.exit(1)
finally:
driver.close()
if __name__ == "__main__":
main()