- Introduced `neo4j-schema-init.py` for creating the foundational schema for the personal knowledge graph used by multiple AI assistants. - Implemented functionality for creating constraints, indexes, and sample nodes, along with comprehensive testing of the schema. - Added `neo4j-validate.py` to perform validation checks on the Neo4j knowledge graph, including constraints, indexes, sample nodes, relationships, and junk data detection. - Enhanced logging for better traceability and debugging during schema initialization and validation processes.
157 lines
5.4 KiB
Python
157 lines
5.4 KiB
Python
"""
|
|
Neo4j Database Reset
|
|
====================
|
|
Wipes all nodes, relationships, constraints, and indexes from the database.
|
|
Use before re-running neo4j-schema-init.py for a clean slate.
|
|
|
|
Usage:
|
|
python neo4j-reset.py
|
|
python neo4j-reset.py --uri bolt://ariel.incus:7687
|
|
python neo4j-reset.py --uri bolt://ariel.incus:7687 --force
|
|
|
|
Environment Variables (optional):
|
|
NEO4J_URI - Bolt URI (default: bolt://localhost:7687)
|
|
NEO4J_USER - Username (default: neo4j)
|
|
NEO4J_PASSWORD - Password (will prompt if not set)
|
|
"""
|
|
|
|
import argparse
|
|
import getpass
|
|
import os
|
|
import sys
|
|
from neo4j import GraphDatabase
|
|
from neo4j.exceptions import AuthError, ServiceUnavailable
|
|
|
|
|
|
def get_credentials(args):
|
|
uri = args.uri or os.environ.get("NEO4J_URI")
|
|
if not uri:
|
|
uri = input("Neo4j URI [bolt://localhost:7687]: ").strip() or "bolt://localhost:7687"
|
|
|
|
user = args.user or os.environ.get("NEO4J_USER")
|
|
if not user:
|
|
user = input("Neo4j username [neo4j]: ").strip() or "neo4j"
|
|
|
|
password = os.environ.get("NEO4J_PASSWORD")
|
|
if not password:
|
|
password = getpass.getpass("Neo4j password: ")
|
|
if not password:
|
|
print("ERROR: Password is required")
|
|
sys.exit(1)
|
|
|
|
return uri, user, password
|
|
|
|
|
|
def reset_database(driver):
|
|
"""Drop all constraints, indexes, and delete all data."""
|
|
with driver.session() as session:
|
|
# 1. Count what exists before wiping
|
|
node_count = session.run("MATCH (n) RETURN count(n) AS c").single()["c"]
|
|
rel_count = session.run("MATCH ()-[r]->() RETURN count(r) AS c").single()["c"]
|
|
constraints = list(session.run("SHOW CONSTRAINTS YIELD name RETURN name"))
|
|
indexes = list(session.run("SHOW INDEXES YIELD name, type WHERE type <> 'LOOKUP' RETURN name"))
|
|
|
|
print(f"\nCurrent database contents:")
|
|
print(f" Nodes: {node_count}")
|
|
print(f" Relationships:{rel_count}")
|
|
print(f" Constraints: {len(constraints)}")
|
|
print(f" Indexes: {len(indexes)}")
|
|
|
|
if node_count == 0 and len(constraints) == 0 and len(indexes) == 0:
|
|
print("\nDatabase is already empty. Nothing to reset.")
|
|
return
|
|
|
|
# 2. Drop all constraints
|
|
dropped_constraints = 0
|
|
for record in constraints:
|
|
name = record["name"]
|
|
try:
|
|
session.run(f"DROP CONSTRAINT {name} IF EXISTS")
|
|
dropped_constraints += 1
|
|
except Exception as e:
|
|
print(f" WARNING: Could not drop constraint {name}: {e}")
|
|
print(f"\n Dropped {dropped_constraints} constraints")
|
|
|
|
# 3. Drop all non-lookup indexes
|
|
dropped_indexes = 0
|
|
for record in indexes:
|
|
name = record["name"]
|
|
try:
|
|
session.run(f"DROP INDEX {name} IF EXISTS")
|
|
dropped_indexes += 1
|
|
except Exception as e:
|
|
print(f" WARNING: Could not drop index {name}: {e}")
|
|
print(f" Dropped {dropped_indexes} indexes")
|
|
|
|
# 4. Delete all nodes and relationships (batch for large DBs)
|
|
deleted = 1
|
|
total_deleted = 0
|
|
while deleted > 0:
|
|
result = session.run(
|
|
"MATCH (n) WITH n LIMIT 10000 DETACH DELETE n RETURN count(*) AS deleted"
|
|
)
|
|
deleted = result.single()["deleted"]
|
|
total_deleted += deleted
|
|
print(f" Deleted {total_deleted} nodes (and their relationships)")
|
|
|
|
# 5. Verify clean
|
|
remaining = session.run("MATCH (n) RETURN count(n) AS c").single()["c"]
|
|
remaining_constraints = len(list(session.run("SHOW CONSTRAINTS")))
|
|
remaining_indexes = len(list(session.run(
|
|
"SHOW INDEXES YIELD name, type WHERE type <> 'LOOKUP' RETURN name"
|
|
)))
|
|
|
|
print(f"\nAfter reset:")
|
|
print(f" Nodes: {remaining}")
|
|
print(f" Constraints: {remaining_constraints}")
|
|
print(f" Indexes: {remaining_indexes}")
|
|
|
|
if remaining == 0 and remaining_constraints == 0 and remaining_indexes == 0:
|
|
print("\n✓ Database is clean. Ready for neo4j-schema-init.py")
|
|
else:
|
|
print("\n⚠ Some items remain — you may need to run this again")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Reset Neo4j database — wipe all data, constraints, and indexes"
|
|
)
|
|
parser.add_argument("--uri", "-u", help="Neo4j Bolt URI")
|
|
parser.add_argument("--user", "-U", help="Neo4j username")
|
|
parser.add_argument("--force", "-f", action="store_true",
|
|
help="Skip confirmation prompt")
|
|
args = parser.parse_args()
|
|
|
|
uri, user, password = get_credentials(args)
|
|
|
|
try:
|
|
driver = GraphDatabase.driver(uri, auth=(user, password))
|
|
# Test connection
|
|
with driver.session() as session:
|
|
session.run("RETURN 1")
|
|
print(f"✓ Connected to {uri}")
|
|
except AuthError:
|
|
print(f"✗ Authentication failed for {uri}")
|
|
sys.exit(1)
|
|
except ServiceUnavailable:
|
|
print(f"✗ Cannot connect to {uri}")
|
|
sys.exit(1)
|
|
|
|
if not args.force:
|
|
confirm = input(f"\n⚠ This will DELETE EVERYTHING in {uri}. Type 'yes' to confirm: ")
|
|
if confirm.strip().lower() != "yes":
|
|
print("Cancelled.")
|
|
sys.exit(0)
|
|
|
|
try:
|
|
reset_database(driver)
|
|
except Exception as e:
|
|
print(f"ERROR: {e}")
|
|
sys.exit(1)
|
|
finally:
|
|
driver.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|