Have to do it in the malware dataset
cat /home/search/Downloads/Datasets/batchMalware/mB00 | sed -e 's/^/{"content":"/' | gawk 'NF{print $0 " \"}"}' | gawk ' {print;} NR % 1 == 0 { print "{ \"index\":{} }"; }' | gawk 'BEGIN{print "{ \"index\":{} }";}{print;}' |head -n -1 | gawk 'END { print "";}{print;}' >> /home/search/Downloads/Datasets/batchMalwareElasticImport/mBE00
#!/bin/bash
# Have to be run as full path to output folder (ending with /) as command line argument 1
# Have to be run in the same current working folder (PWD) as the batch files to be preproccesed
currDir=$(pwd);
for f in $(ls -p | grep -v '/');
do
fileToBePreproccesed=(echo "$currDir/$f");
outputPath=$(echo "$1$f");
cat $fileToBePreproccesed | sed -e 's/^/{"content":"/' | gawk 'NF{print $0 " \"}"}' | gawk ' {print;} NR % 1 == 0 { print "{ \"index\":{} }"; }' | gawk 'BEGIN{print "{ \"index\":{} }";}{print;}' |head -n -1 | gawk 'END { print "";}{print;}' >> $outputPath;
done
chmod 755 ElasticBatchPreproccing.sh
Command run to make the bash script executeble.
#!/bin/bash # Have to be run as full path to output folder (ending with /) as command line argument 1 # Have to be run in the same current working folder (PWD) as the batch files to be preproccesed currDir=$(pwd); for f in $(ls -p | grep -v '/'); do fileToBePreproccesed="$currDir/$f"; outputPath="$1$f"; cat $fileToBePreproccesed | sed -e 's/^/{"content":"/' | gawk 'NF{print $0 " \"}"}' | gawk ' {print;} NR % 1 == 0 { print "{ \"index\":{} }"; }' | gawk 'BEGIN{print "{ \"index\":{} }";}{print;}' |head -n -1 | gawk 'END { print "";}{print;}' >> $outputPath; done |
The bash above worked to preprocess all batch files in Elastic preproccing.
time curl --verbose -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/malware-dataset-test6/malware/_bulk?pretty --data-binary @/home/search/Downloads/Datasets/batchMalwareElasticImport/mBE00 | head -100
#!/bin/bash
# Have to be run as full path to output folder (ending with /) as command line argument 1
# Have to be run in the same current working folder (PWD) as the batch files to be preproccesed
currDir=$(pwd);
indexname=$1;
for f in $(ls -p | grep -v '/');
do
fileToBePreproccesed="$currDir/$f";
curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/$indexname/malware/_bulk --data-binary @$fileToBePreproccesed;
done
chomod 755 ElasticSearchIndexBatch.sh to make it executable
#!/bin/bash
# have to provide index name in command line argument 1 (first and only argument)
# index name have to be in lowercase
# Have to be run in the same current working folder (PWD) as the batch files to be preproccesed
currDir=$(pwd);
indexname=$1;
for f in $(ls -p | grep -v '/');
do
fileToBePreproccesed="$currDir/$f";
curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/$indexname/somedoctype/_bulk --data-binary @$fileToBePreproccesed;
done
#!/bin/bash |