feat(init): add preview_changes method to show read-only schema init diff

Adds preview_changes method to utils/neo4j-schema-init.py. Performs
read-only queries against the live database to compare current state
against the v2.3.0 schema spec. Reports expected constraints, indexes,
and sample nodes/relationships by team.
This commit is contained in:
2026-05-20 07:34:05 -04:00
parent 812473ad97
commit 3e68764368
3 changed files with 312 additions and 48 deletions

View File

@@ -806,34 +806,167 @@ Full schema: docs/neo4j-unified-schema.md (v2.3.0)
print(schema_doc) print(schema_doc)
logger.info("Schema documentation displayed") logger.info("Schema documentation displayed")
def preview_changes(self):
"""
Print what a full init run WOULD create, without writing anything.
Compares the live database's current state to the v2.3.0 schema spec
(79 constraints, all indexes, 14 sample nodes, 7 sample rels). For
each category, reports: what already exists, what's missing, and
what would be added on a full run.
Purely read-only — runs SHOW CONSTRAINTS / SHOW INDEXES / MATCH
queries against the live DB but does not modify any data.
"""
# Known totals from this script's create_* methods. Kept in sync with
# the v2.3.0 schema doc; verified by the unit tests in run_tests().
EXPECTED_CONSTRAINTS = 79
EXPECTED_SAMPLE_NODES = 14
EXPECTED_SAMPLE_RELS = 7
print()
print("" * 60)
print(" DRY RUN — Preview of what a full init would create")
print("" * 60)
with self.driver.session() as session:
# ── Constraints ──────────────────────────────────────────
existing_constraint_count = len(list(
session.run("SHOW CONSTRAINTS YIELD name RETURN name")
))
constraints_to_add = max(0, EXPECTED_CONSTRAINTS - existing_constraint_count)
print(f"\n Constraints: {existing_constraint_count} present / "
f"{EXPECTED_CONSTRAINTS} expected")
print(f" {constraints_to_add} would be created "
f"(or skipped via IF NOT EXISTS)")
# ── Indexes ──────────────────────────────────────────────
# Index count varies as the schema evolves; just report current.
existing_indexes = list(session.run(
"SHOW INDEXES YIELD name, type WHERE type <> 'LOOKUP' RETURN name"
))
print(f"\n Indexes: {len(existing_indexes)} present "
f"(includes constraint-backed)")
print(f" ~30 additional named indexes would be "
f"created (or skipped via IF NOT EXISTS)")
# ── Total node / relationship counts ─────────────────────
total_nodes = session.run(
"MATCH (n) RETURN count(n) AS c"
).single()["c"]
total_rels = session.run(
"MATCH ()-[r]->() RETURN count(r) AS c"
).single()["c"]
print(f"\n Current data: {total_nodes} nodes, "
f"{total_rels} relationships")
print(f" {EXPECTED_SAMPLE_NODES} sample nodes + "
f"{EXPECTED_SAMPLE_RELS} sample rels would be MERGEd")
# ── Node-type breakdown by team ──────────────────────────
print("\n Sample data by team (a full run with --skip-samples=false):")
breakdown = [
("Universal", ["Person:user_main", "Location:location_home"]),
("Personal — Nate", ["Trip:trip_sample_2025"]),
("Personal — Hypatia", ["Book:book_meditations_aurelius",
"Topic:topic_stoicism"]),
("Personal — Watson", ["Goal:goal_sample_2025",
"EmotionalMemory:memory_sample"]),
("Personal — Garth", ["Account:account_tfsa_sample"]),
("Personal — Shawn", ["Contact:contact_sample_personal",
"Communication:comm_sample"]),
("Personal — Cristiano", ["Team:team_arsenal"]),
("Work", ["Client:client_sample_corp",
"Skill:skill_cx_strategy",
"Topic:topic_ai_in_cx"]),
("Engineering — Scotty", ["Infrastructure:infra_neo4j_prod"]),
]
for team, samples in breakdown:
print(f" {team}:")
for s in samples:
print(f"{s}")
print("\n Sample relationships:")
rels = [
"(Person:user_main)-[:SUPPORTS]->(Team:team_arsenal)",
"(Person:user_main)-[:COMPLETED]->(Book:book_meditations_aurelius)",
"(Person:user_main)-[:PURSUING]->(Goal:goal_sample_2025)",
"(Book:book_meditations_aurelius)-[:EXPLORES]->(Topic:topic_stoicism)",
"(Person:user_main)-[:OWNS]->(Account:account_tfsa_sample)",
"(Person:user_main)-[:HAD]->(Communication:comm_sample)",
"(Communication:comm_sample)-[:WITH]->(Contact:contact_sample_personal)",
]
for r in rels:
print(f"{r}")
print()
print(" All writes use MERGE + IF NOT EXISTS, so re-running is")
print(" idempotent. Nothing has been changed by this dry run.")
print("" * 60)
print()
def _mask_password(pw):
"""Mask a password for display: keep first and last char, hide the middle."""
if not pw:
return "(empty)"
if len(pw) <= 2:
return "*" * len(pw)
return f"{pw[0]}{'*' * (len(pw) - 2)}{pw[-1]} ({len(pw)} chars)"
def get_credentials(args): def get_credentials(args):
""" """
Collect Neo4j credentials from environment variables, CLI args, or prompts. Collect Neo4j credentials by prompting for each value sequentially.
Priority: CLI args > Environment variables > Interactive prompts
For each of URI, username, password: show the current default (from CLI
arg, env var, or built-in fallback) in brackets; user hits Enter to
accept or types a new value to override. Password prompt uses getpass
so it isn't echoed and doesn't land in shell history.
Finally, print a summary (password masked) and ask for final confirmation.
If the user declines, exit cleanly without touching the database.
Priority for each default value:
CLI arg > Environment variable > Built-in default
""" """
print()
print("" * 60)
print(" Neo4j Connection")
print("" * 60)
# URI # URI
uri = args.uri or os.environ.get("NEO4J_URI") uri_default = args.uri or os.environ.get("NEO4J_URI") or "bolt://localhost:7687"
if not uri: uri = input(f" Neo4j URI [{uri_default}]: ").strip() or uri_default
uri = input("Neo4j URI [bolt://localhost:7687]: ").strip()
if not uri:
uri = "bolt://localhost:7687"
# Username # Username
user = args.user or os.environ.get("NEO4J_USER") user_default = args.user or os.environ.get("NEO4J_USER") or "neo4j"
if not user: user = input(f" Neo4j username [{user_default}]: ").strip() or user_default
user = input("Neo4j username [neo4j]: ").strip()
if not user:
user = "neo4j"
# Password (never from CLI for security) # Password (always via getpass, never echoed)
password = os.environ.get("NEO4J_PASSWORD") env_password = os.environ.get("NEO4J_PASSWORD")
if not password: if env_password:
password = getpass.getpass("Neo4j password: ") prompt = " Neo4j password [from $NEO4J_PASSWORD, Enter to accept]: "
else:
prompt = " Neo4j password: "
password = getpass.getpass(prompt) or env_password or ""
if not password: if not password:
logger.error("Password is required") logger.error("Password is required")
sys.exit(1) sys.exit(1)
# Summary + confirm
print()
print("" * 60)
print(" Connection summary")
print("" * 60)
print(f" URI: {uri}")
print(f" User: {user}")
print(f" Password: {_mask_password(password)}")
print("" * 60)
confirm = input("Proceed with these credentials? [Y/n]: ").strip().lower()
if confirm and confirm not in ("y", "yes"):
logger.info("Cancelled by user. No changes made.")
sys.exit(0)
return uri, user, password return uri, user, password
@@ -891,10 +1024,66 @@ Schema Reference:
return parser.parse_args() return parser.parse_args()
def prompt_action(args):
"""
Show the interactive action menu and return the chosen action key.
Returns one of: "full", "schema_only", "tests_only", "quit".
If a CLI flag pre-selects an action (--test-only, --skip-samples), that
takes precedence and the menu is skipped — useful for cron jobs and
automation. Otherwise prompt the user.
"""
# CLI flags take precedence over the interactive menu
if args.test_only:
return "tests_only"
if args.skip_samples:
return "schema_only"
print()
print("" * 60)
print(" What would you like to do?")
print("" * 60)
print(" 1) Full init with sample data")
print(" constraints + indexes + sample nodes + verification tests")
print(" 2) Schema only (no sample data)")
print(" constraints + indexes + verification tests")
print(" 3) Tests only (read-only, no writes)")
print(" runs connection + APOC + basic functional checks")
print(" 4) Quit")
print()
print(" All writes use MERGE + IF NOT EXISTS — running options 1 or 2")
print(" against an already-initialized database is safe and idempotent.")
print("" * 60)
while True:
choice = input("Choice [1-4]: ").strip()
if choice == "1":
return "full"
if choice == "2":
return "schema_only"
if choice == "3":
return "tests_only"
if choice == "4" or choice.lower() in ("q", "quit", "exit"):
return "quit"
print(" Please enter 1, 2, 3, or 4.")
def main(): def main():
""" """
Main execution function. Main execution function.
Collects credentials via prompts or environment variables.
Flow:
1. Parse CLI args (mostly to allow defaults and automation overrides).
2. Prompt for URI, username, password — each with a default visible —
then show a summary and require [Y/n] confirmation.
3. Open a connection and verify it works.
4. Run a read-only dry-run preview showing what would be created.
5. Present an action menu (full / schema only / tests only / quit).
6. Execute the chosen action.
CLI flags (--test-only, --skip-samples) skip the menu and pre-select an
action so cron-style automation still works.
""" """
args = parse_args() args = parse_args()
@@ -902,7 +1091,7 @@ def main():
if args.quiet: if args.quiet:
logging.getLogger().setLevel(logging.WARNING) logging.getLogger().setLevel(logging.WARNING)
# Get credentials # Get credentials (interactive prompt + summary + confirm)
uri, user, password = get_credentials(args) uri, user, password = get_credentials(args)
logger.info(f"Connecting to Neo4j at {uri}...") logger.info(f"Connecting to Neo4j at {uri}...")
@@ -924,11 +1113,24 @@ def main():
logger.error(f"✗ Cannot connect to Neo4j at {uri}") logger.error(f"✗ Cannot connect to Neo4j at {uri}")
sys.exit(1) sys.exit(1)
if args.test_only: # Dry-run preview: read-only, shows what a full run would create
# Just run basic tests (no schema verification) schema.preview_changes()
# Decide what to do (CLI flags override the interactive menu)
action = prompt_action(args)
if action == "quit":
logger.info("Cancelled by user. No changes made.")
sys.exit(0)
if action == "tests_only":
success = schema.run_tests(include_schema_tests=False) success = schema.run_tests(include_schema_tests=False)
sys.exit(0 if success else 1) sys.exit(0 if success else 1)
# Both "full" and "schema_only" go through the same provisioning
# path; the only difference is whether sample nodes get created.
create_samples = (action == "full")
# Display schema documentation # Display schema documentation
if not args.skip_docs: if not args.skip_docs:
schema.document_schema() schema.document_schema()
@@ -942,9 +1144,11 @@ def main():
schema.create_indexes() schema.create_indexes()
# Create sample nodes to validate schema # Create sample nodes to validate schema
if not args.skip_samples: if create_samples:
logger.info("Creating sample nodes...") logger.info("Creating sample nodes...")
schema.create_sample_nodes() schema.create_sample_nodes()
else:
logger.info("Skipping sample nodes (schema-only run).")
# Run verification tests (including schema tests) # Run verification tests (including schema tests)
logger.info("Verifying schema...") logger.info("Verifying schema...")

View File

@@ -123,22 +123,66 @@ EXPECTED_INDEX_SAMPLES = [
] ]
def _mask_password(pw):
"""Mask a password for display: keep first and last char, hide the middle."""
if not pw:
return "(empty)"
if len(pw) <= 2:
return "*" * len(pw)
return f"{pw[0]}{'*' * (len(pw) - 2)}{pw[-1]} ({len(pw)} chars)"
def get_credentials(args): def get_credentials(args):
uri = args.uri or os.environ.get("NEO4J_URI") """
if not uri: Collect Neo4j credentials by prompting for each value sequentially.
uri = input("Neo4j URI [bolt://localhost:7687]: ").strip() or "bolt://localhost:7687"
user = args.user or os.environ.get("NEO4J_USER") For each of URI, username, password: show the current default (from CLI
if not user: arg, env var, or built-in fallback) in brackets; user hits Enter to
user = input("Neo4j username [neo4j]: ").strip() or "neo4j" accept or types a new value to override. Password prompt uses getpass
so it isn't echoed and doesn't land in shell history.
password = os.environ.get("NEO4J_PASSWORD") Finally, print a summary (password masked) and ask for final confirmation.
if not password: If the user declines, exit cleanly without touching the database.
password = getpass.getpass("Neo4j password: ") """
print()
print("" * 60)
print(" Neo4j Connection")
print("" * 60)
# URI
uri_default = args.uri or os.environ.get("NEO4J_URI") or "bolt://localhost:7687"
uri = input(f" Neo4j URI [{uri_default}]: ").strip() or uri_default
# Username
user_default = args.user or os.environ.get("NEO4J_USER") or "neo4j"
user = input(f" Neo4j username [{user_default}]: ").strip() or user_default
# Password (always via getpass, never echoed)
env_password = os.environ.get("NEO4J_PASSWORD")
if env_password:
prompt = " Neo4j password [from $NEO4J_PASSWORD, Enter to accept]: "
else:
prompt = " Neo4j password: "
password = getpass.getpass(prompt) or env_password or ""
if not password: if not password:
print("ERROR: Password is required") print("ERROR: Password is required")
sys.exit(1) sys.exit(1)
# Summary + confirm
print()
print("" * 60)
print(" Connection summary")
print("" * 60)
print(f" URI: {uri}")
print(f" User: {user}")
print(f" Password: {_mask_password(password)}")
print("" * 60)
print(" Validation is read-only — no graph data will be modified.")
confirm = input("Proceed with these credentials? [Y/n]: ").strip().lower()
if confirm and confirm not in ("y", "yes"):
print("Cancelled by user.")
sys.exit(0)
return uri, user, password return uri, user, password

16
utils/pyproject.toml Normal file
View File

@@ -0,0 +1,16 @@
[project]
name = "koios-utils"
version = "0.1.0"
description = "Operator scripts for the Koios unified Neo4j knowledge graph — schema init and validation"
requires-python = ">=3.13"
dependencies = [
"neo4j>=5.19,<6",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
# These are operator scripts, not a library. Don't try to package them.
bypass-selection = true