#cat Emails.csv | sed ':a;N;$!ba;s/\n/ /g' | perl -pe 's/$(echo -en '\u000C')\"/\n\n/g'
# open file, replace all newlines (too much to handle for xargs) found a pattern with unicode,
# character \u000C" and qoute character that represents the end of each entry.
# I then replaced this pattern with a newline. (could not do that in sed...
# becouse sed (atleat this version of it sucks at replacing with new line.. so
# had to use perl.
cat Emails.csv | sed ':a;N;$!ba;s/\n/ /g' | perl -pe "s/$(echo -en '\u000C')\"/\n/g" | dos2unix| sed -e 's/\],\\/ /g' -e 's/\},/ /g' -e 's/\}\\/ /g' -e 's/{\\/ /g' -e 's/\":/ /g' -e 's/\\\"/ /g' -e 's/\]\\/ /g' -e 's/\[\\/ /g' -e 's/,\\/ /g' -e 's/ \\/ /g' -e 's/\]\"/ /g' -e 's/\]/ /g' -e 's/\[/ /g' -e 's/"/ /g' -e 's/://g' -e 's/{/ /g' -e 's/}/ /g' |
sed '/^\s*$/d' |
sed 's/\*/ /g' |
sed 's/ \+/ /g' |
perl -pe 's/[^[:ascii:]]//g;' |
tr -cd '\12\40-\176' |
sed -e 's/\\/\\\\/g' | split -l 1000 -d - /home/search/Downloads/Datasets/batchHillaryEmails/hillary
# dos2unix grep "$(echo -en '\u000C')\""
# sed 's/$(echo -en '\u000C')\"/$(echo -e '\n\n')/g'
# gawk '{ sub("$(echo -en '\u000C')\"","\n");}'
#sed 's/$(echo -en '\u000C')\"/
#/g'
#split -l 1000 -d - /home/search/Downloads/Datasets/batchHillaryEmails/hillary