Merge pull request #863 from Human-Connection/2019/kw25/improve_import_accuracy

🍰 2019/kw25/improve_import_accuracy
This commit is contained in:
Ulf Gebhardt 2019-06-24 13:03:24 +02:00 committed by GitHub
commit 070bc23e30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 45 additions and 27 deletions

View File

@ -8,11 +8,18 @@ set +o allexport
# Export collection function defintion
function export_collection () {
"${EXPORT_MONGOEXPORT_BIN}" --db ${MONGODB_DATABASE} --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --collection $1 --collection $1 --out "${EXPORT_PATH}$1.json"
"${EXPORT_MONGOEXPORT_BIN}" --db ${MONGODB_DATABASE} --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --collection $1 --out "${EXPORT_PATH}$1.json"
mkdir -p ${EXPORT_PATH}splits/$1/
split -l ${MONGO_EXPORT_SPLIT_SIZE} -a 3 ${EXPORT_PATH}$1.json ${EXPORT_PATH}splits/$1/
}
# Export collection with query function defintion
function export_collection_query () {
"${EXPORT_MONGOEXPORT_BIN}" --db ${MONGODB_DATABASE} --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --collection $1 --out "${EXPORT_PATH}$1_$3.json" --query "$2"
mkdir -p ${EXPORT_PATH}splits/$1_$3/
split -l ${MONGO_EXPORT_SPLIT_SIZE} -a 3 ${EXPORT_PATH}$1_$3.json ${EXPORT_PATH}splits/$1_$3/
}
# Delete old export & ensure directory
rm -rf ${EXPORT_PATH}*
mkdir -p ${EXPORT_PATH}
@ -24,9 +31,12 @@ ssh -4 -M -S my-ctrl-socket -fnNT -L 27018:localhost:27017 -l ${SSH_USERNAME} ${
export_collection "badges"
export_collection "categories"
export_collection "comments"
export_collection "contributions"
export_collection_query "contributions" "{'type': 'DELETED'}" "DELETED"
export_collection_query "contributions" "{'type': 'post'}" "post"
export_collection_query "contributions" "{'type': 'cando'}" "cando"
export_collection "emotions"
export_collection "follows"
export_collection_query "follows" "{'foreignService': 'organizations'}" "organizations"
export_collection_query "follows" "{'foreignService': 'users'}" "users"
export_collection "invites"
export_collection "notifications"
export_collection "organizations"

View File

@ -109,8 +109,8 @@
}
}
},
[?] deleted: {
[X] type: Boolean,
[?] deleted: { // THis field is not always present in the alpha-data
[?] type: Boolean,
[ ] default: false, // Default value is missing in Nitro
[-] index: true
},
@ -137,7 +137,7 @@ p.contentExcerpt = post.contentExcerpt,
p.visibility = toLower(post.visibility),
p.createdAt = post.createdAt.`$date`,
p.updatedAt = post.updatedAt.`$date`,
p.deleted = post.deleted,
p.deleted = COALESCE(post.deleted,false),
p.disabled = NOT post.isEnabled
WITH p, post
MATCH (u:User {id: post.userId})

View File

@ -9,10 +9,10 @@ set +o allexport
# Delete collection function defintion
function delete_collection () {
# Delete from Database
echo "Delete $1"
"${IMPORT_CYPHERSHELL_BIN}" < $(dirname "$0")/$1_delete.cql > /dev/null
echo "Delete $2"
"${IMPORT_CYPHERSHELL_BIN}" < $(dirname "$0")/$1/delete.cql > /dev/null
# Delete index file
rm -f "${IMPORT_PATH}splits/$1.index"
rm -f "${IMPORT_PATH}splits/$2.index"
}
# Import collection function defintion
@ -34,7 +34,7 @@ function import_collection () {
# calculate the path of the chunk
export IMPORT_CHUNK_PATH_CQL_FILE="${IMPORT_CHUNK_PATH_CQL}$1/${CHUNK_FILE_NAME}"
# load the neo4j command and replace file variable with actual path
NEO4J_COMMAND="$(envsubst '${IMPORT_CHUNK_PATH_CQL_FILE}' < $(dirname "$0")/$1.cql)"
NEO4J_COMMAND="$(envsubst '${IMPORT_CHUNK_PATH_CQL_FILE}' < $(dirname "$0")/$2)"
# run the import of the chunk
echo "Import $1 ${CHUNK_FILE_NAME} (${chunk})"
echo "${NEO4J_COMMAND}" | "${IMPORT_CYPHERSHELL_BIN}" > /dev/null
@ -52,13 +52,14 @@ SECONDS=0
# Delete all Neo4J Database content
echo "Deleting Database Contents"
delete_collection "badges"
delete_collection "categories"
delete_collection "users"
delete_collection "follows"
delete_collection "contributions"
delete_collection "shouts"
delete_collection "comments"
delete_collection "badges" "badges"
delete_collection "categories" "categories"
delete_collection "users" "users"
delete_collection "follows" "follows_users"
delete_collection "contributions" "contributions_post"
delete_collection "contributions" "contributions_cando"
delete_collection "shouts" "shouts"
delete_collection "comments" "comments"
#delete_collection "emotions"
#delete_collection "invites"
@ -75,26 +76,33 @@ echo "DONE"
# Import Data
echo "Start Importing Data"
import_collection "badges"
import_collection "categories"
import_collection "users"
import_collection "follows"
import_collection "contributions"
import_collection "shouts"
import_collection "comments"
import_collection "badges" "badges/badges.cql"
import_collection "categories" "categories/categories.cql"
import_collection "users" "users/users.cql"
import_collection "follows_users" "follows/follows.cql"
#import_collection "follows_organizations" "follows/follows.cql"
import_collection "contributions_post" "contributions/contributions.cql"
import_collection "contributions_cando" "contributions/contributions.cql"
#import_collection "contributions_DELETED" "contributions/contributions.cql"
import_collection "shouts" "shouts/shouts.cql"
import_collection "comments" "comments/comments.cql"
# import_collection "emotions"
# import_collection "invites"
# import_collection "notifications"
# import_collection "organizations"
# import_collection "pages"
# import_collection "projects"
# import_collection "settings"
# import_collection "status"
# import_collection "systemnotifications"
# import_collection "userscandos"
# import_collection "usersettings"
# does only contain dummy data
# import_collection "projects"
# does only contain alpha specifc data
# import_collection "status
# import_collection "settings""
echo "DONE"
echo "Time elapsed: $SECONDS seconds"