Thread: eml2mbox.sh
View Single Post
Old 11 Apr 2003, 01:32 AM   #2
Guest
 
Posts: n/a
#!/bin/sh
# eml2mbox
# by thoran@thoran.com

# Date: 20030410
# Version Number:
Version='0/4/4'

# Description: eml2mbox takes a directory of retarded eml/Windows formatted email files (such as those from Fastmail) and writes an mbox (RFC-822) file for use with unix. The conversion may be destructive or non-destructive and the mbox can be placed elsewhere than the eml files directory.

# Features: This does fairly comprehensive error checking: 'Does the source directory exist?', 'Does the destination directory exist?', 'Are there any .eml files in the source directory?', and 'Is there a pre-existing mbox file in the destination directory?'. Additionally it will engage in a dialogue so as to authorise an overwrite of the existant mbox. It is robust enough that if there are no Date or From headers in an email it will be able to continue.

# Discussion: Yes, I am aware that the plurality of the program's name is incorrect. I thought it sounded better wrong. Variable substition is circumlocutious, however the code is easier to read as the status of a variable (strictly a collection of variables) can be tracked by name changes. Similarly, the checkDirectoryExists function attempts readability, particularly for those not familiar with shell, but with another procedural language. And I know getopts exists, but it doesn't work for me for some reason.

# Acknowledgements: Rob Mueller of Fastmail for the original couple of lines (AKA the non-anally retentive version), a nice reference to RFC-822 formatting (http://www.qmail.org/man/man5/mbox.html), and for not having Fastmail just do this anyway! Thanks also to Era Eriksson <era@iki.fi> for the majority of the sed code which does the extraction of emails from the 'From:' header and for a nice explication thereof.

# Bugs: If a date header does not have a leading 0 for single digit dates (1st, 2nd, ..., 9th of a month), then the From_ separator is not strictly correct at 24 characters. Most MTAs do provide dates with the leading zero. The fix is to check whether that particular (in awkish terms) field is a single character and if so to prefix it with a zero.

# Licence: Umm a licence, a licence, so many to choose. Well I suppose I ought to start with copyright of this any other previous versions which I didn't put this notice on directly, and any future versions in case I start to lose my brain. Then I should move on to copyleft so as to be balanced. I think that says that notices must be left in tact. Well, anyway, every line with a # at the start has to be left in. Any modifications have to be sent to me---which I think is consistent with copyleft, rather than merely publishing as copyleft. Let me know if this is wrong. Either way, I still want revisions...

# The Usual Legal Stuff About Disclaimers Etcetera (AKA The Non-Caveat-Emptor (The Non-Buyer Beware)): While this script may claim to do something, it's a lie, so if you attempt to use it for its stated purpose and it does what you expect then you're doing well. If however, this script for any reason ****s up or ****s something up, or does absolutely nothing for you or anyone or anything else, for any reason, or you are otherwise unsatisfied, upset, annoyed, ****** (off), angry, litigious, vengeful, bummed, or just generally displeased with it and/or me, then it's your fault for using it. Furthermore, it is a precondition of use of this script that you have read it and understand how it works, so as you can determine if it will do what you want, and not merely what my lies about its function might claim. (There is nothing extraordinary about this disclaimer, except for the bit about reading the source maybe. Every software vendor has something very much like the above. They just take a lot more words and still don't make it quite so plain.)

showVersion()
{
echo
echo " eml2mbox $Version."
echo
}

showHelp()
{
echo
echo 'eml2mbox converts a directory of eml files to an mbox. Source and'
echo 'destination directories default to the current directory.'
echo
echo 'Simple usage:'
echo 'eml2mbox'
echo 'eml2mbox -s <path to source directory---the one containing the eml files>'
echo 'eml2mbox -d <path to destination directory---where the mbox is going to>'
echo 'eml2mbox -r <remove all .eml files from the source directory>'
echo
echo 'Usage with all options:'
echo 'eml2mbox ['
echo ' [ -s | --source ]'
echo ' [ -d | --destination ]'
echo ' [ -r | --remove ]'
echo ' |'
echo ' [ -? | --help ]'
echo ' |'
echo ' [ -V | --version ]'
echo ' ].'
echo
}

parseParameters()
{
sourceDirectory='.'
destinationDirectory='.'
destructiveMode=0
currentOption='NULL'

for currentParameter in $allParameters
do
case $currentOption in
'NULL')
case $currentParameter in
'-s' | '--source')
currentOption='s'
;;
'-d' | '--destination')
currentOption='d'
;;
'-r' | '--remove')
destructiveMode=1
currentOption='NULL'
;;
'-?' | '--help')
showHelp
exit 0
;;
'-V' | '--version')
showVersion
exit 0
;;
*)
echo "Unknown option ($currentParameter). See eml2mbox --help..."
echo
exit 1
;;
esac
;;
's')
sourceDirectory=$currentParameter
currentOption='NULL'
;;
'd')
destinationDirectory=$currentParameter
currentOption='NULL'
;;
esac
done
}

checkDirectoryExists()
{
directoryExists=0
parameterOne=$1
directoryToBeChecked=$parameterOne
if [ -d $directoryToBeChecked ]; then
directoryExists=1
fi
}

ensureDirectoryPathHasATrailingSlash()
{
currentDirectory=`pwd`
pathToPossiblySlashlessDirectory=$1
cd $pathToPossiblySlashlessDirectory
pathToDirectoryWithTrailingSlash=`pwd`'/'
cd $currentDirectory
}

verifyParameters()
{
parametersOK=0

if [ $sourceDirectory = '.' ]; then
sourceDirectoryExists=1
else
checkDirectoryExists $sourceDirectory
if [ $directoryExists = 1 ]; then
sourceDirectoryExists=1
else
sourceDirectoryExists=0
echo 'The source directory does not exist.'
fi
fi

if [ $destinationDirectory = '.' ]; then
destinationDirectoryExists=1
else
checkDirectoryExists $destinationDirectory
if [ $directoryExists = 1 ]; then
destinationDirectoryExists=1
else
destinationDirectoryExists=0
echo 'The destination directory does not exist.'
fi
fi

if [ $sourceDirectoryExists = 1 ]; then
if [ -e "*.eml" ]; then
emlFilesFound=0
echo 'The source directory contains no .eml files.'
else
emlFilesFound=1
fi
fi

ensureDirectoryPathHasATrailingSlash $destinationDirectory
destinationDirectory=$pathToDirectoryWithTrailingSlash
if [ $destinationDirectoryExists = 1 ]; then
path=$destinationDirectory'mbox'
mboxFile=`ls $path`
if [ -z "$mboxFile" ]; then
mboxOK=1
else
echo "This destination directory already contains an mbox file:"
echo "$destinationDirectory."
echo -n 'Do you wish to overwrite the mbox file in this directory? (y,n): '
read dialogueResponse
echo
if [ $dialogueResponse = 'y' ]; then
mboxOK=1
else
mboxOK=0
fi
fi
fi

[ $sourceDirectoryExists = 1 ] &&
[ $destinationDirectoryExists = 1 ] &&
[ $emlFilesFound = 1 ] &&
[ $mboxOK = 1 ] &&
parametersOK=1
}

doIt()
{
processID=$$
ensureDirectoryPathHasATrailingSlash $sourceDirectory
sourceDirectory=$pathToDirectoryWithTrailingSlash
sourceFiles=$sourceDirectory'*.eml'

for currentEmail in $sourceFiles
do
from='fake.address@dotbomb.com'
date='Mon Jan 1 00:00:00 9999'
fromFound=0
dateFound=0
until [ $fromFound = 1 ] && [ $dateFound = 1 ]
do
read line
if [ "$line" = '' ]; then
break
else
firstWord=`echo $line | awk '{print $1}'`
if [ $firstWord = 'From:' ]; then
from=`echo $line | sed -e 's/From: //' -e 's/[ ]*([^)]*)[ ]*//g' -e 's/.*<\([^>]*\)>.*/\1/g'`
fromFound=1
elif [ $firstWord = 'Date:' ]; then
date=`echo $line | sed 's/,//' | awk '{printf("%s %s %s %s %s", $2, $4, $3, $6, $5)}'`
dateFound=1
fi
fi
done < "$currentEmail"
echo "From $from $date" >> $destinationDirectory'mbox.tmp'.$processID
cat "$currentEmail" >> $destinationDirectory'mbox.tmp'.$processID
done

if [ -e "$destinationDirectory'mbox'" ]; then
rm $destinationDirectory'mbox'
mv $destinationDirectory'mbox.tmp'.$processID $destinationDirectory'mbox'
else
mv $destinationDirectory'mbox.tmp'.$processID $destinationDirectory'mbox'
fi
if [ $destructiveMode = 1 ]; then
rm $sourceFiles
fi
}

main()
{
parseParameters
verifyParameters
if [ $parametersOK = 1 ]; then
doIt
else
echo 'No changes performed.'
fi
}

allParameters=$@
main
  Reply With Quote