From 497f77ae10c1ac502cf2715b180ea4944b294746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20Sch=C3=A4fer?= Date: Wed, 1 May 2019 12:25:28 +0200 Subject: [PATCH] Breakthrough! Use split+indices for performance @appinteractive thanks for pointing out `split`. You just saved me some days of work to refactor the import statements to use CSV instead of JSON files. @Tirokk when I enter `:schema` in Neo4J web UI, I see the following: ``` :schema Indexes ON :Badge(id) ONLINE ON :Category(id) ONLINE ON :Comment(id) ONLINE ON :Post(id) ONLINE ON :Tag(id) ONLINE ON :User(id) ONLINE No constraints ``` So I temporarily removed the unique constraints on `slug` and added plain indices on `id` for all relevant node types. We cannot omit the `:Label` unfortunately, neo4j does not allow this. So I had to add all indices for all known node labels instead. With indices the import finishes in: ``` Time elapsed: 351 seconds ``` :tada: @appinteractive when I keep the unique indices on slug, I get an error during import that a node with label `:User` and slug `tobias` already exists. Ie. we have unqiue constraint violations in our production data. @mattwr18 @ulfgebhardt @ogerly I started the application on my machine on the production data and it turns out that the index page http://localhost:3000/ takes way to long. Visiting my profile page at http://localhost:3000/profile/5b1693daf850c11207fa6109/robert-schafer is fine, though. Even pagination works. When I visit a post page with not too many comments, the application is fast enough, too: http://localhost:3000/post/5bbf49ebc428ea001c7ca89c/neues-video-format-human-connection-tech-news --- .../maintenance-worker/migration/mongo/import.sh | 7 ++++--- .../maintenance-worker/migration/neo4j/badges.cql | 2 +- .../maintenance-worker/migration/neo4j/categories.cql | 2 +- .../maintenance-worker/migration/neo4j/comments.cql | 3 ++- .../migration/neo4j/contributions.cql | 2 +- .../maintenance-worker/migration/neo4j/follows.cql | 2 +- .../maintenance-worker/migration/neo4j/import.sh | 11 +++++++++-- .../maintenance-worker/migration/neo4j/shouts.cql | 2 +- .../maintenance-worker/migration/neo4j/users.cql | 2 +- 9 files changed, 21 insertions(+), 12 deletions(-) diff --git a/deployment/legacy-migration/maintenance-worker/migration/mongo/import.sh b/deployment/legacy-migration/maintenance-worker/migration/mongo/import.sh index 328560bfc..8958dd2a8 100755 --- a/deployment/legacy-migration/maintenance-worker/migration/mongo/import.sh +++ b/deployment/legacy-migration/maintenance-worker/migration/mongo/import.sh @@ -9,16 +9,17 @@ echo "MONGODB_DATABASE ${MONGODB_DATABASE}" echo "MONGODB_AUTH_DB ${MONGODB_AUTH_DB}" echo "-------------------------------------------------" -[ -z "$SSH_PRIVATE_KEY" ] || create_private_ssh_key_from_env rm -rf /tmp/mongo-export/* -mkdir -p /tmp/mongo-export +mkdir -p /tmp/mongo-export/ ssh -4 -M -S my-ctrl-socket -fnNT -L 27018:localhost:27017 -l ${SSH_USERNAME} ${SSH_HOST} for collection in "categories" "badges" "users" "contributions" "comments" "follows" "shouts" do - mongoexport --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --db ${MONGODB_DATABASE} --collection $collection --out "/tmp/mongo-export/$collection.json" + mongoexport --db ${MONGODB_DATABASE} --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --collection $collection --collection $collection --out "/tmp/mongo-export/$collection.json" + mkdir -p /tmp/mongo-export/splits/$collection/ + split -l 500 /tmp/mongo-export/$collection.json /tmp/mongo-export/splits/$collection/ done ssh -S my-ctrl-socket -O check -l ${SSH_USERNAME} ${SSH_HOST} diff --git a/deployment/legacy-migration/maintenance-worker/migration/neo4j/badges.cql b/deployment/legacy-migration/maintenance-worker/migration/neo4j/badges.cql index f4bf67dda..6b6a09592 100644 --- a/deployment/legacy-migration/maintenance-worker/migration/neo4j/badges.cql +++ b/deployment/legacy-migration/maintenance-worker/migration/neo4j/badges.cql @@ -1,4 +1,4 @@ -CALL apoc.load.json('file:/tmp/mongo-export/badges.json') YIELD value as badge +CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as badge MERGE(b:Badge {id: badge._id["$oid"]}) ON CREATE SET b.key = badge.key, diff --git a/deployment/legacy-migration/maintenance-worker/migration/neo4j/categories.cql b/deployment/legacy-migration/maintenance-worker/migration/neo4j/categories.cql index c22354cbe..776811bec 100644 --- a/deployment/legacy-migration/maintenance-worker/migration/neo4j/categories.cql +++ b/deployment/legacy-migration/maintenance-worker/migration/neo4j/categories.cql @@ -1,4 +1,4 @@ -CALL apoc.load.json('file:/tmp/mongo-export/categories.json') YIELD value as category +CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as category MERGE(c:Category {id: category._id["$oid"]}) ON CREATE SET c.name = category.title, diff --git a/deployment/legacy-migration/maintenance-worker/migration/neo4j/comments.cql b/deployment/legacy-migration/maintenance-worker/migration/neo4j/comments.cql index eb645108a..234d29d26 100644 --- a/deployment/legacy-migration/maintenance-worker/migration/neo4j/comments.cql +++ b/deployment/legacy-migration/maintenance-worker/migration/neo4j/comments.cql @@ -1,4 +1,5 @@ -CALL apoc.load.json('file:/tmp/mongo-export/comments.json') YIELD value as json +CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as json + MERGE (comment:Comment {id: json._id["$oid"]}) ON CREATE SET comment.content = json.content, diff --git a/deployment/legacy-migration/maintenance-worker/migration/neo4j/contributions.cql b/deployment/legacy-migration/maintenance-worker/migration/neo4j/contributions.cql index 134c276cf..01647f7fb 100644 --- a/deployment/legacy-migration/maintenance-worker/migration/neo4j/contributions.cql +++ b/deployment/legacy-migration/maintenance-worker/migration/neo4j/contributions.cql @@ -1,4 +1,4 @@ -CALL apoc.load.json('file:/tmp/mongo-export/contributions.json') YIELD value as post +CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as post MERGE (p:Post {id: post._id["$oid"]}) ON CREATE SET p.title = post.title, diff --git a/deployment/legacy-migration/maintenance-worker/migration/neo4j/follows.cql b/deployment/legacy-migration/maintenance-worker/migration/neo4j/follows.cql index 6f5416723..0cd6d9cfc 100644 --- a/deployment/legacy-migration/maintenance-worker/migration/neo4j/follows.cql +++ b/deployment/legacy-migration/maintenance-worker/migration/neo4j/follows.cql @@ -1,4 +1,4 @@ -CALL apoc.load.json('file:/tmp/mongo-export/follows.json') YIELD value as follow +CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as follow MATCH (u1:User {id: follow.userId}), (u2:User {id: follow.foreignId}) MERGE (u1)-[:FOLLOWS]->(u2) ; diff --git a/deployment/legacy-migration/maintenance-worker/migration/neo4j/import.sh b/deployment/legacy-migration/maintenance-worker/migration/neo4j/import.sh index 6f539c501..ed033c1b9 100755 --- a/deployment/legacy-migration/maintenance-worker/migration/neo4j/import.sh +++ b/deployment/legacy-migration/maintenance-worker/migration/neo4j/import.sh @@ -2,8 +2,15 @@ set -e SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -echo "MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r;" | cypher-shell -a $NEO4J_URI +echo "MATCH (n) DETACH DELETE n;" | cypher-shell + +SECONDS=0 for collection in "badges" "categories" "users" "follows" "contributions" "shouts" "comments" do - echo "Import ${collection}..." && cypher-shell -a $NEO4J_URI < $SCRIPT_DIRECTORY/$collection.cql + for chunk in /tmp/mongo-export/splits/$collection/* + do + mv $chunk /tmp/mongo-export/splits/current-chunk.json + echo "Import ${chunk}" && cypher-shell < $SCRIPT_DIRECTORY/$collection.cql + done done +echo "Time elapsed: $SECONDS seconds" diff --git a/deployment/legacy-migration/maintenance-worker/migration/neo4j/shouts.cql b/deployment/legacy-migration/maintenance-worker/migration/neo4j/shouts.cql index cd72ab66b..5019cdc32 100644 --- a/deployment/legacy-migration/maintenance-worker/migration/neo4j/shouts.cql +++ b/deployment/legacy-migration/maintenance-worker/migration/neo4j/shouts.cql @@ -1,4 +1,4 @@ -CALL apoc.load.json('file:/tmp/mongo-export/shouts.json') YIELD value as shout +CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as shout MATCH (u:User {id: shout.userId}), (p:Post {id: shout.foreignId}) MERGE (u)-[:SHOUTED]->(p) ; diff --git a/deployment/legacy-migration/maintenance-worker/migration/neo4j/users.cql b/deployment/legacy-migration/maintenance-worker/migration/neo4j/users.cql index 22eb46882..c877f8377 100644 --- a/deployment/legacy-migration/maintenance-worker/migration/neo4j/users.cql +++ b/deployment/legacy-migration/maintenance-worker/migration/neo4j/users.cql @@ -1,4 +1,4 @@ -CALL apoc.load.json('file:/tmp/mongo-export/users.json') YIELD value as user +CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as user MERGE(u:User {id: user._id["$oid"]}) ON CREATE SET u.name = user.name,