From 0970014a5949bd1476549329c6bfd4bb8562e3cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20Sch=C3=A4fer?= Date: Wed, 16 Jan 2019 00:37:21 +0100 Subject: [PATCH] Create db-migration-worker as a docker container The idea is to import/dump the remote database via SSH, restore it to the local mongodb, export .json collections to a shared volume and import the json collections with cypher-shell. --- .dockerignore | 2 + db-migration-worker/.gitignore | 1 + db-migration-worker/Dockerfile | 9 +++ db-migration-worker/import.sh | 32 ++++++++ docker-compose.override.yml | 26 +++++++ scripts/import-legacy-db/.gitignore | 4 - scripts/import-legacy-db/README.md | 82 -------------------- scripts/import-legacy-db/import-legacy-db.sh | 37 --------- 8 files changed, 70 insertions(+), 123 deletions(-) create mode 100644 db-migration-worker/.gitignore create mode 100644 db-migration-worker/Dockerfile create mode 100755 db-migration-worker/import.sh delete mode 100644 scripts/import-legacy-db/.gitignore delete mode 100644 scripts/import-legacy-db/README.md delete mode 100755 scripts/import-legacy-db/import-legacy-db.sh diff --git a/.dockerignore b/.dockerignore index 84b5adc92..6b6b2193f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,6 +6,7 @@ .env Dockerfile +docker-compose*.yml ./*.png ./*.log @@ -14,3 +15,4 @@ kubernetes/ node_modules/ scripts/ dist/ +db-migration-worker/ diff --git a/db-migration-worker/.gitignore b/db-migration-worker/.gitignore new file mode 100644 index 000000000..690bae050 --- /dev/null +++ b/db-migration-worker/.gitignore @@ -0,0 +1 @@ +id_rsa diff --git a/db-migration-worker/Dockerfile b/db-migration-worker/Dockerfile new file mode 100644 index 000000000..844f65e16 --- /dev/null +++ b/db-migration-worker/Dockerfile @@ -0,0 +1,9 @@ +FROM mongo:latest +ARG KNOWN_HOST + +RUN apt-get update +RUN apt-get -y install openssh-client +COPY id_rsa /root/.ssh/id_rsa +RUN ssh-keyscan -H $KNOWN_HOST >> /root/.ssh/known_hosts +COPY import.sh . + diff --git a/db-migration-worker/import.sh b/db-migration-worker/import.sh new file mode 100755 index 000000000..ccc2cf19b --- /dev/null +++ b/db-migration-worker/import.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +for var in "SSH_USERNAME" "SSH_HOST" "MONGODB_USERNAME" "MONGODB_PASSWORD" "MONGODB_DATABASE" "NEO4J_USERNAME" "NEO4J_PASSWORD" "MONGODB_AUTH_DB" +do + if [[ -z "${!var}" ]]; then + echo "${var} is undefined" + exit -1 + fi +done + +OUTPUT_FILE_NAME=${OUTPUT_FILE_NAME:-human-connection-dump}.archive + +echo "SSH_USERNAME ${SSH_USERNAME}" +echo "SSH_HOST ${SSH_HOST}" +echo "MONGODB_USERNAME ${MONGODB_USERNAME}" +echo "MONGODB_PASSWORD ${MONGODB_PASSWORD}" +echo "MONGODB_DATABASE ${MONGODB_DATABASE}" +echo "MONGODB_AUTH_DB ${MONGODB_AUTH_DB}" +echo "NEO4J_USERNAME ${NEO4J_USERNAME}" +echo "NEO4J_PASSWORD ${NEO4J_PASSWORD}" +echo "-------------------------------------------------" + +ssh -4 -M -S my-ctrl-socket -fnNT -L 27018:localhost:27017 -l ${SSH_USERNAME} ${SSH_HOST} +mongodump --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --gzip --archive=${OUTPUT_FILE_NAME} +ssh -S my-ctrl-socket -O check -l ${SSH_USERNAME} ${SSH_HOST} +ssh -S my-ctrl-socket -O exit -l ${SSH_USERNAME} ${SSH_HOST} + +mongorestore --gzip --archive=human-connection-dump.archive +# cat ./neo4j_import.cql | /usr/share/neo4j/bin/cypher-shell + + + diff --git a/docker-compose.override.yml b/docker-compose.override.yml index ef7d52c7e..f2b3fa0fc 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -11,6 +11,32 @@ services: - /nitro-backend/node_modules command: yarn run dev neo4j: + volumes: + - mongo-export:/mongo-export ports: - 7687:7687 - 7474:7474 + environment: + - NEO4J_apoc_import_file_enabled=true + db-migration-worker: + build: + context: db-migration-worker + args: + - "KNOWN_HOST=${SSH_HOST}" + volumes: + - mongo-export:/mongo-export + networks: + - hc-network + environment: + - "SSH_USERNAME=${SSH_USERNAME}" + - "SSH_HOST=${SSH_HOST}" + - "MONGODB_USERNAME=${MONGODB_USERNAME}" + - "MONGODB_PASSWORD=${MONGODB_PASSWORD}" + - "MONGODB_AUTH_DB=${MONGODB_AUTH_DB}" + - "MONGODB_DATABASE=${MONGODB_DATABASE}" + - "NEO4J_USERNAME=${NEO4J_USERNAME}" + - "NEO4J_PASSWORD=${NEO4J_PASSWORD}" + command: "--smallfiles --logpath=/dev/null" + +volumes: + mongo-export: diff --git a/scripts/import-legacy-db/.gitignore b/scripts/import-legacy-db/.gitignore deleted file mode 100644 index 82c26eeec..000000000 --- a/scripts/import-legacy-db/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -* -!import-legacy-db.sh -!README.md -!.gitignore diff --git a/scripts/import-legacy-db/README.md b/scripts/import-legacy-db/README.md deleted file mode 100644 index 9ad50d658..000000000 --- a/scripts/import-legacy-db/README.md +++ /dev/null @@ -1,82 +0,0 @@ -# MongoDB scripts - -This README explains how to directly access the production or staging database -for backup or query purposes. - -## Backup script - -The backup script is intended to be used as a cron job or as a single command from your laptop. -It uses SSH tunneling to a remote host and dumps the mongo database on your machine. -Therefore, a public SSH key needs to be copied to the remote machine. - -### Usage - -All parameters must be supplied as environment variables: - -| Name | required | -|-----------------------|-----------| -| SSH\_USERNAME | yes | -| SSH\_HOST | yes | -| MONGODB\_USERNAME | yes | -| MONGODB\_PASSWORD | yes | -| MONGODB\_DATABASE | yes | -| NEO4J\_USER | yes | -| NEO4J\_PASSWORD | yes | -| OUTPUT | | -| GPG\_PASSWORD | | - -If you set `GPG_PASSWORD`, the resulting archive will be encrypted (symmetrically, with the given passphrase). -This is recommended if you dump the database on your personal laptop because of data security. - -After exporting these environment variables to your bash, run: - -```bash -./import-legacy-db.sh -``` - - -### Import into your local mongo db (optional) - -Run (but change the file name accordingly): -```bash -mongorestore --gzip --archive=human-connection-dump_2018-11-21.archive -``` - -If you previously encrypted your dump, run: -```bash -gpg --decrypt human-connection-dump_2018-11-21.archive.gpg | mongorestore --gzip --archive -``` - - -## Query remote MongoDB - -In contrast to the backup script, querying the database is expected to be done -interactively and on demand by the user. Therefore our suggestion is to use a -tool like [MongoDB compass](https://www.mongodb.com/products/compass) to query -the mongo db through an SSH tunnel. This tool can export a collection as .csv -file and you can further do custom processing with a csv tool like -[q](https://github.com/harelba/q). - -### Suggested workflow - -Read on the mongodb compass documentation how to connect to the remote mongo -database [through SSH](https://docs.mongodb.com/compass/master/connect/). You -will need all the credentials and a public SSH key on the server as for the -backup script above. - -Once you have a connection, use the MongoDB Compass -[query bar](https://docs.mongodb.com/compass/master/query-bar/) to query for the -desired data. You can -[export the result](https://docs.mongodb.com/compass/master/import-export/) as -.json or .csv. - -Once you have the .csv file on your machine, use standard SQL queries through -the command line tool q to further process the data. - -For example -```sh -q "SELECT email FROM ./invites.csv INTERSECT SELECT email FROM ./emails.csv" -H --delimiter=, -``` - -[Q's website](http://harelba.github.io/q/usage.html) explains the usage fairly -well. diff --git a/scripts/import-legacy-db/import-legacy-db.sh b/scripts/import-legacy-db/import-legacy-db.sh deleted file mode 100755 index 263ea9a53..000000000 --- a/scripts/import-legacy-db/import-legacy-db.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -for var in "SSH_USERNAME" "SSH_HOST" "MONGODB_USERNAME" "MONGODB_PASSWORD" "MONGODB_DATABASE" "NEO4J_USER" "NEO4J_PASSWORD" -do - if [[ -z "${!var}" ]]; then - echo "${var} is undefined" - exit -1 - fi -done - -OUTPUT_FILE_NAME=${OUTPUT:-human-connection-dump}_$(date -I).archive - -echo "SSH_USERNAME ${SSH_USERNAME}" -echo "SSH_HOST ${SSH_HOST}" -echo "MONGODB_USERNAME ${MONGODB_USERNAME}" -echo "MONGODB_PASSWORD ${MONGODB_PASSWORD}" -echo "MONGODB_DATABASE ${MONGODB_DATABASE}" -echo "NEO4J_USER ${NEO4J_USER}" -echo "NEO4J_PASSWORD ${NEO4J_PASSWORD}" -echo "OUTPUT_FILE_NAME ${OUTPUT_FILE_NAME}" -echo "GPG_PASSWORD ${GPG_PASSWORD:-}" -echo "-------------------------------------------------" - -ssh -M -S my-ctrl-socket -fnNT -L 27018:localhost:27017 -l ${SSH_USERNAME} ${SSH_HOST} - -if [[ -z "${!GPG_PASSWORD}" ]]; then - mongodump --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase admin --gzip --archive | gpg -c --batch --passphrase ${GPG_PASSWORD} --output ${OUTPUT_FILE_NAME}.gpg -else - mongodump --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase admin --gzip --archive=${OUTPUT_FILE_NAME} -fi - - -ssh -S my-ctrl-socket -O check -l ${SSH_USERNAME} ${SSH_HOST} -ssh -S my-ctrl-socket -O exit -l ${SSH_USERNAME} ${SSH_HOST} - - -