Breakthrough! Use split+indices for performance

@appinteractive thanks for pointing out `split`. You just saved me some
days of work to refactor the import statements to use CSV instead of
JSON files.

@Tirokk when I enter `:schema` in Neo4J web UI, I see the following:
```
:schema
Indexes
   ON :Badge(id) ONLINE
   ON :Category(id) ONLINE
   ON :Comment(id) ONLINE
   ON :Post(id) ONLINE
   ON :Tag(id) ONLINE
   ON :User(id) ONLINE

No constraints
```

So I temporarily removed the unique constraints on `slug` and added
plain indices on `id` for all relevant node types. We cannot omit the
`:Label` unfortunately, neo4j does not allow this. So I had to add all
indices for all known node labels instead.

With indices the import finishes in:
```
Time elapsed: 351 seconds
```
🎉

@appinteractive when I keep the unique indices on slug, I get an error
during import that a node with label `:User` and slug `tobias` already
exists. Ie. we have unqiue constraint violations in our production data.

@mattwr18 @ulfgebhardt @ogerly I started the application on my machine
on the production data and it turns out that the index page
http://localhost:3000/ takes way to long. Visiting my profile page at
http://localhost:3000/profile/5b1693daf850c11207fa6109/robert-schafer
is fine, though. Even pagination works. When I visit a post page with
not too many comments, the application is fast enough, too:
http://localhost:3000/post/5bbf49ebc428ea001c7ca89c/neues-video-format-human-connection-tech-news
This commit is contained in:
Robert Schäfer 2019-05-01 12:25:28 +02:00
parent 0fa83188f5
commit 497f77ae10
9 changed files with 21 additions and 12 deletions

View File

@ -9,16 +9,17 @@ echo "MONGODB_DATABASE ${MONGODB_DATABASE}"
echo "MONGODB_AUTH_DB ${MONGODB_AUTH_DB}" echo "MONGODB_AUTH_DB ${MONGODB_AUTH_DB}"
echo "-------------------------------------------------" echo "-------------------------------------------------"
[ -z "$SSH_PRIVATE_KEY" ] || create_private_ssh_key_from_env
rm -rf /tmp/mongo-export/* rm -rf /tmp/mongo-export/*
mkdir -p /tmp/mongo-export mkdir -p /tmp/mongo-export/
ssh -4 -M -S my-ctrl-socket -fnNT -L 27018:localhost:27017 -l ${SSH_USERNAME} ${SSH_HOST} ssh -4 -M -S my-ctrl-socket -fnNT -L 27018:localhost:27017 -l ${SSH_USERNAME} ${SSH_HOST}
for collection in "categories" "badges" "users" "contributions" "comments" "follows" "shouts" for collection in "categories" "badges" "users" "contributions" "comments" "follows" "shouts"
do do
mongoexport --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --db ${MONGODB_DATABASE} --collection $collection --out "/tmp/mongo-export/$collection.json" mongoexport --db ${MONGODB_DATABASE} --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --collection $collection --collection $collection --out "/tmp/mongo-export/$collection.json"
mkdir -p /tmp/mongo-export/splits/$collection/
split -l 500 /tmp/mongo-export/$collection.json /tmp/mongo-export/splits/$collection/
done done
ssh -S my-ctrl-socket -O check -l ${SSH_USERNAME} ${SSH_HOST} ssh -S my-ctrl-socket -O check -l ${SSH_USERNAME} ${SSH_HOST}

View File

@ -1,4 +1,4 @@
CALL apoc.load.json('file:/tmp/mongo-export/badges.json') YIELD value as badge CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as badge
MERGE(b:Badge {id: badge._id["$oid"]}) MERGE(b:Badge {id: badge._id["$oid"]})
ON CREATE SET ON CREATE SET
b.key = badge.key, b.key = badge.key,

View File

@ -1,4 +1,4 @@
CALL apoc.load.json('file:/tmp/mongo-export/categories.json') YIELD value as category CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as category
MERGE(c:Category {id: category._id["$oid"]}) MERGE(c:Category {id: category._id["$oid"]})
ON CREATE SET ON CREATE SET
c.name = category.title, c.name = category.title,

View File

@ -1,4 +1,5 @@
CALL apoc.load.json('file:/tmp/mongo-export/comments.json') YIELD value as json CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as json
MERGE (comment:Comment {id: json._id["$oid"]}) MERGE (comment:Comment {id: json._id["$oid"]})
ON CREATE SET ON CREATE SET
comment.content = json.content, comment.content = json.content,

View File

@ -1,4 +1,4 @@
CALL apoc.load.json('file:/tmp/mongo-export/contributions.json') YIELD value as post CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as post
MERGE (p:Post {id: post._id["$oid"]}) MERGE (p:Post {id: post._id["$oid"]})
ON CREATE SET ON CREATE SET
p.title = post.title, p.title = post.title,

View File

@ -1,4 +1,4 @@
CALL apoc.load.json('file:/tmp/mongo-export/follows.json') YIELD value as follow CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as follow
MATCH (u1:User {id: follow.userId}), (u2:User {id: follow.foreignId}) MATCH (u1:User {id: follow.userId}), (u2:User {id: follow.foreignId})
MERGE (u1)-[:FOLLOWS]->(u2) MERGE (u1)-[:FOLLOWS]->(u2)
; ;

View File

@ -2,8 +2,15 @@
set -e set -e
SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r;" | cypher-shell -a $NEO4J_URI echo "MATCH (n) DETACH DELETE n;" | cypher-shell
SECONDS=0
for collection in "badges" "categories" "users" "follows" "contributions" "shouts" "comments" for collection in "badges" "categories" "users" "follows" "contributions" "shouts" "comments"
do do
echo "Import ${collection}..." && cypher-shell -a $NEO4J_URI < $SCRIPT_DIRECTORY/$collection.cql for chunk in /tmp/mongo-export/splits/$collection/*
do
mv $chunk /tmp/mongo-export/splits/current-chunk.json
echo "Import ${chunk}" && cypher-shell < $SCRIPT_DIRECTORY/$collection.cql
done
done done
echo "Time elapsed: $SECONDS seconds"

View File

@ -1,4 +1,4 @@
CALL apoc.load.json('file:/tmp/mongo-export/shouts.json') YIELD value as shout CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as shout
MATCH (u:User {id: shout.userId}), (p:Post {id: shout.foreignId}) MATCH (u:User {id: shout.userId}), (p:Post {id: shout.foreignId})
MERGE (u)-[:SHOUTED]->(p) MERGE (u)-[:SHOUTED]->(p)
; ;

View File

@ -1,4 +1,4 @@
CALL apoc.load.json('file:/tmp/mongo-export/users.json') YIELD value as user CALL apoc.load.json('file:/tmp/mongo-export/splits/current-chunk.json') YIELD value as user
MERGE(u:User {id: user._id["$oid"]}) MERGE(u:User {id: user._id["$oid"]})
ON CREATE SET ON CREATE SET
u.name = user.name, u.name = user.name,