Create db-migration-worker as a docker container

The idea is to import/dump the remote database via SSH, restore it to
the local mongodb, export .json collections to a shared volume and
import the json collections with cypher-shell.
This commit is contained in:
Robert Schäfer 2019-01-16 00:37:21 +01:00
parent 57a6b259eb
commit 0970014a59
8 changed files with 70 additions and 123 deletions

View File

@ -6,6 +6,7 @@
.env
Dockerfile
docker-compose*.yml
./*.png
./*.log
@ -14,3 +15,4 @@ kubernetes/
node_modules/
scripts/
dist/
db-migration-worker/

1
db-migration-worker/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
id_rsa

View File

@ -0,0 +1,9 @@
FROM mongo:latest
ARG KNOWN_HOST
RUN apt-get update
RUN apt-get -y install openssh-client
COPY id_rsa /root/.ssh/id_rsa
RUN ssh-keyscan -H $KNOWN_HOST >> /root/.ssh/known_hosts
COPY import.sh .

32
db-migration-worker/import.sh Executable file
View File

@ -0,0 +1,32 @@
#!/bin/bash
for var in "SSH_USERNAME" "SSH_HOST" "MONGODB_USERNAME" "MONGODB_PASSWORD" "MONGODB_DATABASE" "NEO4J_USERNAME" "NEO4J_PASSWORD" "MONGODB_AUTH_DB"
do
if [[ -z "${!var}" ]]; then
echo "${var} is undefined"
exit -1
fi
done
OUTPUT_FILE_NAME=${OUTPUT_FILE_NAME:-human-connection-dump}.archive
echo "SSH_USERNAME ${SSH_USERNAME}"
echo "SSH_HOST ${SSH_HOST}"
echo "MONGODB_USERNAME ${MONGODB_USERNAME}"
echo "MONGODB_PASSWORD ${MONGODB_PASSWORD}"
echo "MONGODB_DATABASE ${MONGODB_DATABASE}"
echo "MONGODB_AUTH_DB ${MONGODB_AUTH_DB}"
echo "NEO4J_USERNAME ${NEO4J_USERNAME}"
echo "NEO4J_PASSWORD ${NEO4J_PASSWORD}"
echo "-------------------------------------------------"
ssh -4 -M -S my-ctrl-socket -fnNT -L 27018:localhost:27017 -l ${SSH_USERNAME} ${SSH_HOST}
mongodump --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --gzip --archive=${OUTPUT_FILE_NAME}
ssh -S my-ctrl-socket -O check -l ${SSH_USERNAME} ${SSH_HOST}
ssh -S my-ctrl-socket -O exit -l ${SSH_USERNAME} ${SSH_HOST}
mongorestore --gzip --archive=human-connection-dump.archive
# cat ./neo4j_import.cql | /usr/share/neo4j/bin/cypher-shell

View File

@ -11,6 +11,32 @@ services:
- /nitro-backend/node_modules
command: yarn run dev
neo4j:
volumes:
- mongo-export:/mongo-export
ports:
- 7687:7687
- 7474:7474
environment:
- NEO4J_apoc_import_file_enabled=true
db-migration-worker:
build:
context: db-migration-worker
args:
- "KNOWN_HOST=${SSH_HOST}"
volumes:
- mongo-export:/mongo-export
networks:
- hc-network
environment:
- "SSH_USERNAME=${SSH_USERNAME}"
- "SSH_HOST=${SSH_HOST}"
- "MONGODB_USERNAME=${MONGODB_USERNAME}"
- "MONGODB_PASSWORD=${MONGODB_PASSWORD}"
- "MONGODB_AUTH_DB=${MONGODB_AUTH_DB}"
- "MONGODB_DATABASE=${MONGODB_DATABASE}"
- "NEO4J_USERNAME=${NEO4J_USERNAME}"
- "NEO4J_PASSWORD=${NEO4J_PASSWORD}"
command: "--smallfiles --logpath=/dev/null"
volumes:
mongo-export:

View File

@ -1,4 +0,0 @@
*
!import-legacy-db.sh
!README.md
!.gitignore

View File

@ -1,82 +0,0 @@
# MongoDB scripts
This README explains how to directly access the production or staging database
for backup or query purposes.
## Backup script
The backup script is intended to be used as a cron job or as a single command from your laptop.
It uses SSH tunneling to a remote host and dumps the mongo database on your machine.
Therefore, a public SSH key needs to be copied to the remote machine.
### Usage
All parameters must be supplied as environment variables:
| Name | required |
|-----------------------|-----------|
| SSH\_USERNAME | yes |
| SSH\_HOST | yes |
| MONGODB\_USERNAME | yes |
| MONGODB\_PASSWORD | yes |
| MONGODB\_DATABASE | yes |
| NEO4J\_USER | yes |
| NEO4J\_PASSWORD | yes |
| OUTPUT | |
| GPG\_PASSWORD | |
If you set `GPG_PASSWORD`, the resulting archive will be encrypted (symmetrically, with the given passphrase).
This is recommended if you dump the database on your personal laptop because of data security.
After exporting these environment variables to your bash, run:
```bash
./import-legacy-db.sh
```
### Import into your local mongo db (optional)
Run (but change the file name accordingly):
```bash
mongorestore --gzip --archive=human-connection-dump_2018-11-21.archive
```
If you previously encrypted your dump, run:
```bash
gpg --decrypt human-connection-dump_2018-11-21.archive.gpg | mongorestore --gzip --archive
```
## Query remote MongoDB
In contrast to the backup script, querying the database is expected to be done
interactively and on demand by the user. Therefore our suggestion is to use a
tool like [MongoDB compass](https://www.mongodb.com/products/compass) to query
the mongo db through an SSH tunnel. This tool can export a collection as .csv
file and you can further do custom processing with a csv tool like
[q](https://github.com/harelba/q).
### Suggested workflow
Read on the mongodb compass documentation how to connect to the remote mongo
database [through SSH](https://docs.mongodb.com/compass/master/connect/). You
will need all the credentials and a public SSH key on the server as for the
backup script above.
Once you have a connection, use the MongoDB Compass
[query bar](https://docs.mongodb.com/compass/master/query-bar/) to query for the
desired data. You can
[export the result](https://docs.mongodb.com/compass/master/import-export/) as
.json or .csv.
Once you have the .csv file on your machine, use standard SQL queries through
the command line tool q to further process the data.
For example
```sh
q "SELECT email FROM ./invites.csv INTERSECT SELECT email FROM ./emails.csv" -H --delimiter=,
```
[Q's website](http://harelba.github.io/q/usage.html) explains the usage fairly
well.

View File

@ -1,37 +0,0 @@
#!/bin/bash
for var in "SSH_USERNAME" "SSH_HOST" "MONGODB_USERNAME" "MONGODB_PASSWORD" "MONGODB_DATABASE" "NEO4J_USER" "NEO4J_PASSWORD"
do
if [[ -z "${!var}" ]]; then
echo "${var} is undefined"
exit -1
fi
done
OUTPUT_FILE_NAME=${OUTPUT:-human-connection-dump}_$(date -I).archive
echo "SSH_USERNAME ${SSH_USERNAME}"
echo "SSH_HOST ${SSH_HOST}"
echo "MONGODB_USERNAME ${MONGODB_USERNAME}"
echo "MONGODB_PASSWORD ${MONGODB_PASSWORD}"
echo "MONGODB_DATABASE ${MONGODB_DATABASE}"
echo "NEO4J_USER ${NEO4J_USER}"
echo "NEO4J_PASSWORD ${NEO4J_PASSWORD}"
echo "OUTPUT_FILE_NAME ${OUTPUT_FILE_NAME}"
echo "GPG_PASSWORD ${GPG_PASSWORD:-<none>}"
echo "-------------------------------------------------"
ssh -M -S my-ctrl-socket -fnNT -L 27018:localhost:27017 -l ${SSH_USERNAME} ${SSH_HOST}
if [[ -z "${!GPG_PASSWORD}" ]]; then
mongodump --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase admin --gzip --archive | gpg -c --batch --passphrase ${GPG_PASSWORD} --output ${OUTPUT_FILE_NAME}.gpg
else
mongodump --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase admin --gzip --archive=${OUTPUT_FILE_NAME}
fi
ssh -S my-ctrl-socket -O check -l ${SSH_USERNAME} ${SSH_HOST}
ssh -S my-ctrl-socket -O exit -l ${SSH_USERNAME} ${SSH_HOST}