diff --git a/bash/src/bib2web.bash b/bash/src/bib2web.bash index 0606b36..375d17e 100644 --- a/bash/src/bib2web.bash +++ b/bash/src/bib2web.bash @@ -39,6 +39,10 @@ main() { processInputFile checkResultAndAbortIfNeeded "$?" + # Process the entries + processEntries + checkResultAndAbortIfNeeded "$?" + # Clean up the temporary directory cleanUp } diff --git a/bash/src/error-codes.bash b/bash/src/error-codes.bash index a6132bf..3dcbbfe 100644 --- a/bash/src/error-codes.bash +++ b/bash/src/error-codes.bash @@ -2,20 +2,25 @@ # Error codes for tool setup export BIB2WEB_TOOL_FINDER_NOT_FOUND="1" -export BIB2WEB_GREP_NOT_FOUND="2" -export BIB2WEB_AWK_NOT_FOUND="3" -export BIB2WEB_RM_NOT_FOUND="4" -export BIB2WEB_MKTEMP_NOT_FOUND="5" -export BIB2WEB_TOUCH_NOT_FOUND="6" -export BIB2WEB_CAT_NOT_FOUND="7" +export BIB2WEB_BASENAME_NOT_FOUND="2" +export BIB2WEB_GREP_NOT_FOUND="3" +export BIB2WEB_AWK_NOT_FOUND="4" +export BIB2WEB_RM_NOT_FOUND="5" +export BIB2WEB_MKTEMP_NOT_FOUND="6" +export BIB2WEB_TOUCH_NOT_FOUND="7" +export BIB2WEB_CAT_NOT_FOUND="8" +export BIB2WEB_FIND_NOT_FOUND="9" # Error codes for option parsing -export BIB2WEB_MISSING_BIBTEX_FILE="8" -export BIB2WEB_NOT_BIBTEX_FILE="9" +export BIB2WEB_MISSING_BIBTEX_FILE="10" +export BIB2WEB_NOT_BIBTEX_FILE="11" # Error codes for directory and file setup -export BIB2WEB_CANNOT_CREATE_TMP_DIRECTORY="10" -export BIB2WEB_CANNOT_CREATE_LOG_FILE="11" +export BIB2WEB_CANNOT_CREATE_TMP_DIRECTORY="20" +export BIB2WEB_CANNOT_CREATE_LOG_FILE="21" # Error codes for input processing -export BIB2WEB_CANNOT_PROCESS_BIBTEX_FILE="12" +export BIB2WEB_CANNOT_PROCESS_BIBTEX_FILE="30" + +# Error codes for output generation +export BIB2WEB_CANNOT_FIND_ENTRIES="40" diff --git a/bash/src/input-processor.bash b/bash/src/input-processor.bash index 163bd02..3a3b9af 100644 --- a/bash/src/input-processor.bash +++ b/bash/src/input-processor.bash @@ -11,12 +11,12 @@ parseInputFile() { generateEntrySplitterFile() { local splitterFile="$1" - "${BIB2WEB_CAT}" < "${BIB2WEB_TMP_DIR}/${splitterFile}" + "${BIB2WEB_CAT}" < "${splitterFile}" BEGIN { data = "" file = "" } -/^@.*/ { +/^@/ { if (length(data) > 0) { print data > file data = "" @@ -39,7 +39,7 @@ EOF splitEntries() { local splitterFile="$1" local output= - output=$("${BIB2WEB_AWK}" -f "${BIB2WEB_TMP_DIR}/${splitterFile}" "${BIB2WEB_BIBTEX_FILE}" 2>&1) + output=$("${BIB2WEB_AWK}" -f "${splitterFile}" "${BIB2WEB_BIBTEX_FILE}" 2>&1) local result="$?" vvverbose "Splitter result: ${result}" if [ "${result}" -gt "0" ]; then @@ -52,7 +52,7 @@ splitEntries() { processInputFile() { verbose "Processing the input file..." vverbose "Splitting the input BibTeX file into individual entries..." - local splitterFile="splitter.awk" + local splitterFile="${BIB2WEB_TMP_DIR}/splitter.awk" generateEntrySplitterFile "${splitterFile}" splitEntries "${splitterFile}" local splitResult="$?" @@ -60,6 +60,119 @@ processInputFile() { error "Could not process the contents of ${BIB2WEB_BIBTEX_FILE}!" return "${BIB2WEB_CANNOT_PROCESS_BIBTEX_FILE}" fi - vverbose "Input processing completed!" +} + +parseField() { + local field="$1" + local entry="$2" + local entryIntermediateFile="$3" + local parserFile="${BIB2WEB_TMP_DIR}/${field}-input.awk" + if [ ! -e "${parserFile}" ]; then + cat < "${parserFile}" +BEGIN { + FS = " = " +} +tolower(\$1) ~ /${field}$/ { + data = \$2 + # Strip the trailing comma, if exists + sub(/,$/, "", data) + # Quoted data + sub(/^"/, "", data) + sub(/"$/, "", data) + # Data in curly braces + sub(/^{/, "", data) + sub(/}$/, "", data) + # Internal curly braces + gsub(/{/, "", data) + gsub(/}/, "", data) + # Replace tildes with a space + gsub(/~/, " ", data) + # Remove the special characters' leading slash + gsub(/\\\\/, "", data) + print data +} +EOF + fi + local fieldContent + fieldContent=$("${BIB2WEB_AWK}" -f "${parserFile}" "${entry}" 2>&1) + vvverbose "${field}: ${fieldContent}" + echo "${field}: ${fieldContent}" >> "${entryIntermediateFile}" +} + +parseEntry() { + local entry="$1" + local entryFileName + entryFileName=$("${BIB2WEB_BASENAME}" "${entry}") + vverbose "Parsing entry ${entryFileName}..." + local entryIntermediateFile + entryIntermediateFile="${entry}.bib2web" + # Parse the entry type + local type + type=$("${BIB2WEB_AWK}" "/^@(.*){/ { split(\$0, keyrow, \"{\"); print tolower(keyrow[1])}" "${entry}") + vverbose "type: ${type}" + echo "type: ${type}" > "${entryIntermediateFile}" + # Parse the default fields + parseField "address" "${entry}" "${entryIntermediateFile}" + parseField "annote" "${entry}" "${entryIntermediateFile}" + parseField "author" "${entry}" "${entryIntermediateFile}" + parseField "booktitle" "${entry}" "${entryIntermediateFile}" + parseField "chapter" "${entry}" "${entryIntermediateFile}" + parseField "crossref" "${entry}" "${entryIntermediateFile}" + parseField "doi" "${entry}" "${entryIntermediateFile}" + parseField "edition" "${entry}" "${entryIntermediateFile}" + parseField "editor" "${entry}" "${entryIntermediateFile}" + parseField "email" "${entry}" "${entryIntermediateFile}" + parseField "howpublished" "${entry}" "${entryIntermediateFile}" + parseField "institution" "${entry}" "${entryIntermediateFile}" + parseField "journal" "${entry}" "${entryIntermediateFile}" + parseField "key" "${entry}" "${entryIntermediateFile}" + parseField "month" "${entry}" "${entryIntermediateFile}" + parseField "note" "${entry}" "${entryIntermediateFile}" + parseField "number" "${entry}" "${entryIntermediateFile}" + parseField "organization" "${entry}" "${entryIntermediateFile}" + parseField "pages" "${entry}" "${entryIntermediateFile}" + parseField "publisher" "${entry}" "${entryIntermediateFile}" + parseField "school" "${entry}" "${entryIntermediateFile}" + parseField "series" "${entry}" "${entryIntermediateFile}" + parseField "title" "${entry}" "${entryIntermediateFile}" + parseField "type" "${entry}" "${entryIntermediateFile}" + parseField "volume" "${entry}" "${entryIntermediateFile}" + parseField "year" "${entry}" "${entryIntermediateFile}" + return "0" +} + +getEntries() { + "${BIB2WEB_FIND}" "${BIB2WEB_TMP_DIR}" -name "*.bib" +} + +parseEntries() { + for entry in $(getEntries); do + parseEntry "${entry}" + local entryParsingResult="$?" + if [ "${entryParsingResult}" -gt "0" ]; then + error "Error when parsing an entry!" + return "${entryParsingResult}" + break + fi + done + return "0" +} + +processEntries() { + verbose "Processing the entries..." + # Do a sanity check that the entries can be read + local entries + entries=$(getEntries) + local entryFindResult="$?" + if [ "${entryFindResult}" -gt "0" ]; then + error "Could not fetch the entries!" + return "${BIB2WEB_CANNOT_FIND_ENTRIES}" + fi + local entryCount + entryCount=$(echo "${entries}" | grep -c ".bib") + vverbose "Number of entries: ${entryCount}" + parseEntries + verbose "Input processing completed!" verbose "${BIB2WEB_LOG_SEPARATOR}" + return "0" } \ No newline at end of file diff --git a/bash/src/logger.bash b/bash/src/logger.bash index ed1c8ba..b8156e0 100644 --- a/bash/src/logger.bash +++ b/bash/src/logger.bash @@ -53,8 +53,8 @@ verbose() { vverbose() { if [ "${BIB2WEB_VERBOSE}" -gt "2" ]; then local message="$*" - echo "- ${message}" >&1 - echo "- ${message}" >> "${BIB2WEB_LOG_FILE}" + echo "* ${message}" >&1 + echo "* ${message}" >> "${BIB2WEB_LOG_FILE}" fi } diff --git a/bash/src/options-parser.bash b/bash/src/options-parser.bash index d8b4cfe..d842005 100644 --- a/bash/src/options-parser.bash +++ b/bash/src/options-parser.bash @@ -10,7 +10,7 @@ source "${BIB2WEB_BASE_DIR}/error-codes.bash" # Generic version getter getVersion() { local scriptName - scriptName=$(basename "$0") + scriptName=$("${BIB2WEB_BASENAME}" "$0") printf "%s, version %s" "${scriptName}" "${BIB2WEB_VERSION}" } diff --git a/bash/src/parameters.bash.in b/bash/src/parameters.bash.in index 19edf4c..4294d16 100644 --- a/bash/src/parameters.bash.in +++ b/bash/src/parameters.bash.in @@ -4,12 +4,14 @@ export BIB2WEB_VERSION="%BIB2WEB_VERSION%" # Shell tools used by the script +export BIB2WEB_BASENAME export BIB2WEB_GREP export BIB2WEB_AWK export BIB2WEB_MKTEMP export BIB2WEB_RM export BIB2WEB_TOUCH export BIB2WEB_CAT +export BIB2WEB_FIND # Temporary directory and files export BIB2WEB_TMP_DIR diff --git a/bash/src/preprocessor.bash b/bash/src/preprocessor.bash index b71d2b1..df1893c 100644 --- a/bash/src/preprocessor.bash +++ b/bash/src/preprocessor.bash @@ -43,6 +43,13 @@ setUpTools() { printError "No tool finder tool available!" return "${toolFinderResult}" fi + # basename + BIB2WEB_BASENAME=$("${toolFinder}" "basename") + local basenameResult="$?" + if [ "${basenameResult}" -gt 0 ]; then + printError "basename not found!" + return "${BIB2WEB_BASENAME_NOT_FOUND}" + fi # grep BIB2WEB_GREP=$("${toolFinder}" "grep") local grepResult="$?" @@ -85,6 +92,13 @@ setUpTools() { printError "cat not found!" return "${BIB2WEB_CAT_NOT_FOUND}" fi + # find + BIB2WEB_FIND=$("${toolFinder}" "find") + local findResult="$?" + if [ "${findResult}" -gt 0 ]; then + printError "find not found!" + return "${BIB2WEB_FIND_NOT_FOUND}" + fi return 0 } diff --git a/bash/test/input-processor-tests.bash b/bash/test/input-processor-tests.bash index f022f64..b08f1a2 100644 --- a/bash/test/input-processor-tests.bash +++ b/bash/test/input-processor-tests.bash @@ -30,7 +30,7 @@ testSplittingInputFileWithOnlyOneEntryResultsInASingleFile() { assertEquals "1" "${numberOfSplitEntries}" } -testSplittingInputFileWithOnlyMoreThanOneEntryResultsInMultipleFile() { +testSplittingInputFileWithOnlyMoreThanOneEntryResultsInMultipleFiles() { BIB2WEB_BIBTEX_FILE="${SOURCE_DIRECTORY}/../../testdata/combined.bib" processInputFile local numberOfSplitEntries @@ -39,4 +39,32 @@ testSplittingInputFileWithOnlyMoreThanOneEntryResultsInMultipleFile() { expectedNumberOfEntries=$(grep -c "^@" "${BIB2WEB_BIBTEX_FILE}") assertNotEquals "1" "${numberOfSplitEntries}" assertEquals "${expectedNumberOfEntries}" "${numberOfSplitEntries}" +} + +testParseEntry() { + BIB2WEB_BIBTEX_FILE="${SOURCE_DIRECTORY}/../../testdata/article/minimum.bib" + processInputFile + local numberOfSplitEntries + numberOfSplitEntries=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib" | grep -c ".bib") + + local entryFile + entryFile=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib") + parseEntry "${entryFile}" + + local numberOfParsedEntries + numberOfParsedEntries=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib2web" | grep -c ".bib2web") + assertEquals "${numberOfSplitEntries}" "${numberOfParsedEntries}" +} + +testParseSeveralEntries() { + BIB2WEB_BIBTEX_FILE="${SOURCE_DIRECTORY}/../../testdata/combined.bib" + processInputFile + local numberOfSplitEntries + numberOfSplitEntries=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib" | grep -c ".bib") + + parseEntries + + local numberOfParsedEntries + numberOfParsedEntries=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib2web" | grep -c ".bib2web") + assertEquals "${numberOfSplitEntries}" "${numberOfParsedEntries}" } \ No newline at end of file diff --git a/bash/test/preprocessor-tests.bash b/bash/test/preprocessor-tests.bash index 550f4e9..99b18e1 100644 --- a/bash/test/preprocessor-tests.bash +++ b/bash/test/preprocessor-tests.bash @@ -15,6 +15,17 @@ tearDown() { fi } +testToolsSetupSuccess() { + assertNotNull "${BIB2WEB_BASENAME}" + assertNotNull "${BIB2WEB_GREP}" + assertNotNull "${BIB2WEB_AWK}" + assertNotNull "${BIB2WEB_MKTEMP}" + assertNotNull "${BIB2WEB_RM}" + assertNotNull "${BIB2WEB_TOUCH}" + assertNotNull "${BIB2WEB_CAT}" + assertNotNull "${BIB2WEB_FIND}" +} + testSetUpFilesAndCleanUp() { BIB2WEB_LOG_FILE="foo.log" diff --git a/testdata/article/minimum.bib b/testdata/article/minimum.bib index 39ac35a..7d79bfb 100644 --- a/testdata/article/minimum.bib +++ b/testdata/article/minimum.bib @@ -1,6 +1,6 @@ @article{ARTICLE-MINIMUM, - author = "Article Author", - title = "Article Title", + autHOr = "Article Author", + title = {Article Title}, journal = "Article Journal", year = "Article Year", volume = "Article Volume" diff --git a/testdata/combined.bib b/testdata/combined.bib index 724e0a5..aaf138c 100644 --- a/testdata/combined.bib +++ b/testdata/combined.bib @@ -1,7 +1,7 @@ @article{ARTICLE-MINIMUM, author = "Article Author", - title = "Article Title", - journal = "Article Journal", + title = {Article {Title~Foo}}, + journal = "Article\ Journal", year = "Article Year", volume = "Article Volume" }