Merge pull request #863 from Human-Connection/2019/kw25/improve_import_accuracy

🍰 2019/kw25/improve_import_accuracy
This commit is contained in:
Ulf Gebhardt 2019-06-24 13:03:24 +02:00 committed by GitHub
commit 070bc23e30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 45 additions and 27 deletions

View File

@ -8,11 +8,18 @@ set +o allexport
# Export collection function defintion # Export collection function defintion
function export_collection () { function export_collection () {
"${EXPORT_MONGOEXPORT_BIN}" --db ${MONGODB_DATABASE} --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --collection $1 --collection $1 --out "${EXPORT_PATH}$1.json" "${EXPORT_MONGOEXPORT_BIN}" --db ${MONGODB_DATABASE} --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --collection $1 --out "${EXPORT_PATH}$1.json"
mkdir -p ${EXPORT_PATH}splits/$1/ mkdir -p ${EXPORT_PATH}splits/$1/
split -l ${MONGO_EXPORT_SPLIT_SIZE} -a 3 ${EXPORT_PATH}$1.json ${EXPORT_PATH}splits/$1/ split -l ${MONGO_EXPORT_SPLIT_SIZE} -a 3 ${EXPORT_PATH}$1.json ${EXPORT_PATH}splits/$1/
} }
# Export collection with query function defintion
function export_collection_query () {
"${EXPORT_MONGOEXPORT_BIN}" --db ${MONGODB_DATABASE} --host localhost -d ${MONGODB_DATABASE} --port 27018 --username ${MONGODB_USERNAME} --password ${MONGODB_PASSWORD} --authenticationDatabase ${MONGODB_AUTH_DB} --collection $1 --out "${EXPORT_PATH}$1_$3.json" --query "$2"
mkdir -p ${EXPORT_PATH}splits/$1_$3/
split -l ${MONGO_EXPORT_SPLIT_SIZE} -a 3 ${EXPORT_PATH}$1_$3.json ${EXPORT_PATH}splits/$1_$3/
}
# Delete old export & ensure directory # Delete old export & ensure directory
rm -rf ${EXPORT_PATH}* rm -rf ${EXPORT_PATH}*
mkdir -p ${EXPORT_PATH} mkdir -p ${EXPORT_PATH}
@ -24,9 +31,12 @@ ssh -4 -M -S my-ctrl-socket -fnNT -L 27018:localhost:27017 -l ${SSH_USERNAME} ${
export_collection "badges" export_collection "badges"
export_collection "categories" export_collection "categories"
export_collection "comments" export_collection "comments"
export_collection "contributions" export_collection_query "contributions" "{'type': 'DELETED'}" "DELETED"
export_collection_query "contributions" "{'type': 'post'}" "post"
export_collection_query "contributions" "{'type': 'cando'}" "cando"
export_collection "emotions" export_collection "emotions"
export_collection "follows" export_collection_query "follows" "{'foreignService': 'organizations'}" "organizations"
export_collection_query "follows" "{'foreignService': 'users'}" "users"
export_collection "invites" export_collection "invites"
export_collection "notifications" export_collection "notifications"
export_collection "organizations" export_collection "organizations"

View File

@ -109,8 +109,8 @@
} }
} }
}, },
[?] deleted: { [?] deleted: { // THis field is not always present in the alpha-data
[X] type: Boolean, [?] type: Boolean,
[ ] default: false, // Default value is missing in Nitro [ ] default: false, // Default value is missing in Nitro
[-] index: true [-] index: true
}, },
@ -137,7 +137,7 @@ p.contentExcerpt = post.contentExcerpt,
p.visibility = toLower(post.visibility), p.visibility = toLower(post.visibility),
p.createdAt = post.createdAt.`$date`, p.createdAt = post.createdAt.`$date`,
p.updatedAt = post.updatedAt.`$date`, p.updatedAt = post.updatedAt.`$date`,
p.deleted = post.deleted, p.deleted = COALESCE(post.deleted,false),
p.disabled = NOT post.isEnabled p.disabled = NOT post.isEnabled
WITH p, post WITH p, post
MATCH (u:User {id: post.userId}) MATCH (u:User {id: post.userId})

View File

@ -9,10 +9,10 @@ set +o allexport
# Delete collection function defintion # Delete collection function defintion
function delete_collection () { function delete_collection () {
# Delete from Database # Delete from Database
echo "Delete $1" echo "Delete $2"
"${IMPORT_CYPHERSHELL_BIN}" < $(dirname "$0")/$1_delete.cql > /dev/null "${IMPORT_CYPHERSHELL_BIN}" < $(dirname "$0")/$1/delete.cql > /dev/null
# Delete index file # Delete index file
rm -f "${IMPORT_PATH}splits/$1.index" rm -f "${IMPORT_PATH}splits/$2.index"
} }
# Import collection function defintion # Import collection function defintion
@ -34,7 +34,7 @@ function import_collection () {
# calculate the path of the chunk # calculate the path of the chunk
export IMPORT_CHUNK_PATH_CQL_FILE="${IMPORT_CHUNK_PATH_CQL}$1/${CHUNK_FILE_NAME}" export IMPORT_CHUNK_PATH_CQL_FILE="${IMPORT_CHUNK_PATH_CQL}$1/${CHUNK_FILE_NAME}"
# load the neo4j command and replace file variable with actual path # load the neo4j command and replace file variable with actual path
NEO4J_COMMAND="$(envsubst '${IMPORT_CHUNK_PATH_CQL_FILE}' < $(dirname "$0")/$1.cql)" NEO4J_COMMAND="$(envsubst '${IMPORT_CHUNK_PATH_CQL_FILE}' < $(dirname "$0")/$2)"
# run the import of the chunk # run the import of the chunk
echo "Import $1 ${CHUNK_FILE_NAME} (${chunk})" echo "Import $1 ${CHUNK_FILE_NAME} (${chunk})"
echo "${NEO4J_COMMAND}" | "${IMPORT_CYPHERSHELL_BIN}" > /dev/null echo "${NEO4J_COMMAND}" | "${IMPORT_CYPHERSHELL_BIN}" > /dev/null
@ -52,13 +52,14 @@ SECONDS=0
# Delete all Neo4J Database content # Delete all Neo4J Database content
echo "Deleting Database Contents" echo "Deleting Database Contents"
delete_collection "badges" delete_collection "badges" "badges"
delete_collection "categories" delete_collection "categories" "categories"
delete_collection "users" delete_collection "users" "users"
delete_collection "follows" delete_collection "follows" "follows_users"
delete_collection "contributions" delete_collection "contributions" "contributions_post"
delete_collection "shouts" delete_collection "contributions" "contributions_cando"
delete_collection "comments" delete_collection "shouts" "shouts"
delete_collection "comments" "comments"
#delete_collection "emotions" #delete_collection "emotions"
#delete_collection "invites" #delete_collection "invites"
@ -75,26 +76,33 @@ echo "DONE"
# Import Data # Import Data
echo "Start Importing Data" echo "Start Importing Data"
import_collection "badges" import_collection "badges" "badges/badges.cql"
import_collection "categories" import_collection "categories" "categories/categories.cql"
import_collection "users" import_collection "users" "users/users.cql"
import_collection "follows" import_collection "follows_users" "follows/follows.cql"
import_collection "contributions" #import_collection "follows_organizations" "follows/follows.cql"
import_collection "shouts" import_collection "contributions_post" "contributions/contributions.cql"
import_collection "comments" import_collection "contributions_cando" "contributions/contributions.cql"
#import_collection "contributions_DELETED" "contributions/contributions.cql"
import_collection "shouts" "shouts/shouts.cql"
import_collection "comments" "comments/comments.cql"
# import_collection "emotions" # import_collection "emotions"
# import_collection "invites" # import_collection "invites"
# import_collection "notifications" # import_collection "notifications"
# import_collection "organizations" # import_collection "organizations"
# import_collection "pages" # import_collection "pages"
# import_collection "projects"
# import_collection "settings"
# import_collection "status"
# import_collection "systemnotifications" # import_collection "systemnotifications"
# import_collection "userscandos" # import_collection "userscandos"
# import_collection "usersettings" # import_collection "usersettings"
# does only contain dummy data
# import_collection "projects"
# does only contain alpha specifc data
# import_collection "status
# import_collection "settings""
echo "DONE" echo "DONE"
echo "Time elapsed: $SECONDS seconds" echo "Time elapsed: $SECONDS seconds"