Skip to content

Commit

Permalink
bash: Add initial support for the content parsing of the entries
Browse files Browse the repository at this point in the history
  • Loading branch information
JussiPekonen committed Aug 11, 2020
1 parent 4d5a1e0 commit 1899312
Show file tree
Hide file tree
Showing 11 changed files with 201 additions and 24 deletions.
4 changes: 4 additions & 0 deletions bash/src/bib2web.bash
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ main() {
processInputFile
checkResultAndAbortIfNeeded "$?"

# Process the entries
processEntries
checkResultAndAbortIfNeeded "$?"

# Clean up the temporary directory
cleanUp
}
Expand Down
27 changes: 16 additions & 11 deletions bash/src/error-codes.bash
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,25 @@

# Error codes for tool setup
export BIB2WEB_TOOL_FINDER_NOT_FOUND="1"
export BIB2WEB_GREP_NOT_FOUND="2"
export BIB2WEB_AWK_NOT_FOUND="3"
export BIB2WEB_RM_NOT_FOUND="4"
export BIB2WEB_MKTEMP_NOT_FOUND="5"
export BIB2WEB_TOUCH_NOT_FOUND="6"
export BIB2WEB_CAT_NOT_FOUND="7"
export BIB2WEB_BASENAME_NOT_FOUND="2"
export BIB2WEB_GREP_NOT_FOUND="3"
export BIB2WEB_AWK_NOT_FOUND="4"
export BIB2WEB_RM_NOT_FOUND="5"
export BIB2WEB_MKTEMP_NOT_FOUND="6"
export BIB2WEB_TOUCH_NOT_FOUND="7"
export BIB2WEB_CAT_NOT_FOUND="8"
export BIB2WEB_FIND_NOT_FOUND="9"

# Error codes for option parsing
export BIB2WEB_MISSING_BIBTEX_FILE="8"
export BIB2WEB_NOT_BIBTEX_FILE="9"
export BIB2WEB_MISSING_BIBTEX_FILE="10"
export BIB2WEB_NOT_BIBTEX_FILE="11"

# Error codes for directory and file setup
export BIB2WEB_CANNOT_CREATE_TMP_DIRECTORY="10"
export BIB2WEB_CANNOT_CREATE_LOG_FILE="11"
export BIB2WEB_CANNOT_CREATE_TMP_DIRECTORY="20"
export BIB2WEB_CANNOT_CREATE_LOG_FILE="21"

# Error codes for input processing
export BIB2WEB_CANNOT_PROCESS_BIBTEX_FILE="12"
export BIB2WEB_CANNOT_PROCESS_BIBTEX_FILE="30"

# Error codes for output generation
export BIB2WEB_CANNOT_FIND_ENTRIES="40"
123 changes: 118 additions & 5 deletions bash/src/input-processor.bash
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ parseInputFile() {

generateEntrySplitterFile() {
local splitterFile="$1"
"${BIB2WEB_CAT}" <<EOF > "${BIB2WEB_TMP_DIR}/${splitterFile}"
"${BIB2WEB_CAT}" <<EOF > "${splitterFile}"
BEGIN {
data = ""
file = ""
}
/^@.*/ {
/^@/ {
if (length(data) > 0) {
print data > file
data = ""
Expand All @@ -39,7 +39,7 @@ EOF
splitEntries() {
local splitterFile="$1"
local output=
output=$("${BIB2WEB_AWK}" -f "${BIB2WEB_TMP_DIR}/${splitterFile}" "${BIB2WEB_BIBTEX_FILE}" 2>&1)
output=$("${BIB2WEB_AWK}" -f "${splitterFile}" "${BIB2WEB_BIBTEX_FILE}" 2>&1)
local result="$?"
vvverbose "Splitter result: ${result}"
if [ "${result}" -gt "0" ]; then
Expand All @@ -52,14 +52,127 @@ splitEntries() {
processInputFile() {
verbose "Processing the input file..."
vverbose "Splitting the input BibTeX file into individual entries..."
local splitterFile="splitter.awk"
local splitterFile="${BIB2WEB_TMP_DIR}/splitter.awk"
generateEntrySplitterFile "${splitterFile}"
splitEntries "${splitterFile}"
local splitResult="$?"
if [ "${splitResult}" -gt "0" ]; then
error "Could not process the contents of ${BIB2WEB_BIBTEX_FILE}!"
return "${BIB2WEB_CANNOT_PROCESS_BIBTEX_FILE}"
fi
vverbose "Input processing completed!"
}

parseField() {
local field="$1"
local entry="$2"
local entryIntermediateFile="$3"
local parserFile="${BIB2WEB_TMP_DIR}/${field}-input.awk"
if [ ! -e "${parserFile}" ]; then
cat <<EOF > "${parserFile}"
BEGIN {
FS = " = "
}
tolower(\$1) ~ /${field}$/ {
data = \$2
# Strip the trailing comma, if exists
sub(/,$/, "", data)
# Quoted data
sub(/^"/, "", data)
sub(/"$/, "", data)
# Data in curly braces
sub(/^{/, "", data)
sub(/}$/, "", data)
# Internal curly braces
gsub(/{/, "", data)
gsub(/}/, "", data)
# Replace tildes with a space
gsub(/~/, " ", data)
# Remove the special characters' leading slash
gsub(/\\\\/, "", data)
print data
}
EOF
fi
local fieldContent
fieldContent=$("${BIB2WEB_AWK}" -f "${parserFile}" "${entry}" 2>&1)
vvverbose "${field}: ${fieldContent}"
echo "${field}: ${fieldContent}" >> "${entryIntermediateFile}"
}

parseEntry() {
local entry="$1"
local entryFileName
entryFileName=$("${BIB2WEB_BASENAME}" "${entry}")
vverbose "Parsing entry ${entryFileName}..."
local entryIntermediateFile
entryIntermediateFile="${entry}.bib2web"
# Parse the entry type
local type
type=$("${BIB2WEB_AWK}" "/^@(.*){/ { split(\$0, keyrow, \"{\"); print tolower(keyrow[1])}" "${entry}")
vverbose "type: ${type}"
echo "type: ${type}" > "${entryIntermediateFile}"
# Parse the default fields
parseField "address" "${entry}" "${entryIntermediateFile}"
parseField "annote" "${entry}" "${entryIntermediateFile}"
parseField "author" "${entry}" "${entryIntermediateFile}"
parseField "booktitle" "${entry}" "${entryIntermediateFile}"
parseField "chapter" "${entry}" "${entryIntermediateFile}"
parseField "crossref" "${entry}" "${entryIntermediateFile}"
parseField "doi" "${entry}" "${entryIntermediateFile}"
parseField "edition" "${entry}" "${entryIntermediateFile}"
parseField "editor" "${entry}" "${entryIntermediateFile}"
parseField "email" "${entry}" "${entryIntermediateFile}"
parseField "howpublished" "${entry}" "${entryIntermediateFile}"
parseField "institution" "${entry}" "${entryIntermediateFile}"
parseField "journal" "${entry}" "${entryIntermediateFile}"
parseField "key" "${entry}" "${entryIntermediateFile}"
parseField "month" "${entry}" "${entryIntermediateFile}"
parseField "note" "${entry}" "${entryIntermediateFile}"
parseField "number" "${entry}" "${entryIntermediateFile}"
parseField "organization" "${entry}" "${entryIntermediateFile}"
parseField "pages" "${entry}" "${entryIntermediateFile}"
parseField "publisher" "${entry}" "${entryIntermediateFile}"
parseField "school" "${entry}" "${entryIntermediateFile}"
parseField "series" "${entry}" "${entryIntermediateFile}"
parseField "title" "${entry}" "${entryIntermediateFile}"
parseField "type" "${entry}" "${entryIntermediateFile}"
parseField "volume" "${entry}" "${entryIntermediateFile}"
parseField "year" "${entry}" "${entryIntermediateFile}"
return "0"
}

getEntries() {
"${BIB2WEB_FIND}" "${BIB2WEB_TMP_DIR}" -name "*.bib"
}

parseEntries() {
for entry in $(getEntries); do
parseEntry "${entry}"
local entryParsingResult="$?"
if [ "${entryParsingResult}" -gt "0" ]; then
error "Error when parsing an entry!"
return "${entryParsingResult}"
break
fi
done
return "0"
}

processEntries() {
verbose "Processing the entries..."
# Do a sanity check that the entries can be read
local entries
entries=$(getEntries)
local entryFindResult="$?"
if [ "${entryFindResult}" -gt "0" ]; then
error "Could not fetch the entries!"
return "${BIB2WEB_CANNOT_FIND_ENTRIES}"
fi
local entryCount
entryCount=$(echo "${entries}" | grep -c ".bib")
vverbose "Number of entries: ${entryCount}"
parseEntries
verbose "Input processing completed!"
verbose "${BIB2WEB_LOG_SEPARATOR}"
return "0"
}
4 changes: 2 additions & 2 deletions bash/src/logger.bash
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ verbose() {
vverbose() {
if [ "${BIB2WEB_VERBOSE}" -gt "2" ]; then
local message="$*"
echo "- ${message}" >&1
echo "- ${message}" >> "${BIB2WEB_LOG_FILE}"
echo "* ${message}" >&1
echo "* ${message}" >> "${BIB2WEB_LOG_FILE}"
fi
}

Expand Down
2 changes: 1 addition & 1 deletion bash/src/options-parser.bash
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ source "${BIB2WEB_BASE_DIR}/error-codes.bash"
# Generic version getter
getVersion() {
local scriptName
scriptName=$(basename "$0")
scriptName=$("${BIB2WEB_BASENAME}" "$0")
printf "%s, version %s" "${scriptName}" "${BIB2WEB_VERSION}"
}

Expand Down
2 changes: 2 additions & 0 deletions bash/src/parameters.bash.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
export BIB2WEB_VERSION="%BIB2WEB_VERSION%"

# Shell tools used by the script
export BIB2WEB_BASENAME
export BIB2WEB_GREP
export BIB2WEB_AWK
export BIB2WEB_MKTEMP
export BIB2WEB_RM
export BIB2WEB_TOUCH
export BIB2WEB_CAT
export BIB2WEB_FIND

# Temporary directory and files
export BIB2WEB_TMP_DIR
Expand Down
14 changes: 14 additions & 0 deletions bash/src/preprocessor.bash
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ setUpTools() {
printError "No tool finder tool available!"
return "${toolFinderResult}"
fi
# basename
BIB2WEB_BASENAME=$("${toolFinder}" "basename")
local basenameResult="$?"
if [ "${basenameResult}" -gt 0 ]; then
printError "basename not found!"
return "${BIB2WEB_BASENAME_NOT_FOUND}"
fi
# grep
BIB2WEB_GREP=$("${toolFinder}" "grep")
local grepResult="$?"
Expand Down Expand Up @@ -85,6 +92,13 @@ setUpTools() {
printError "cat not found!"
return "${BIB2WEB_CAT_NOT_FOUND}"
fi
# find
BIB2WEB_FIND=$("${toolFinder}" "find")
local findResult="$?"
if [ "${findResult}" -gt 0 ]; then
printError "find not found!"
return "${BIB2WEB_FIND_NOT_FOUND}"
fi
return 0
}

Expand Down
30 changes: 29 additions & 1 deletion bash/test/input-processor-tests.bash
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ testSplittingInputFileWithOnlyOneEntryResultsInASingleFile() {
assertEquals "1" "${numberOfSplitEntries}"
}

testSplittingInputFileWithOnlyMoreThanOneEntryResultsInMultipleFile() {
testSplittingInputFileWithOnlyMoreThanOneEntryResultsInMultipleFiles() {
BIB2WEB_BIBTEX_FILE="${SOURCE_DIRECTORY}/../../testdata/combined.bib"
processInputFile
local numberOfSplitEntries
Expand All @@ -39,4 +39,32 @@ testSplittingInputFileWithOnlyMoreThanOneEntryResultsInMultipleFile() {
expectedNumberOfEntries=$(grep -c "^@" "${BIB2WEB_BIBTEX_FILE}")
assertNotEquals "1" "${numberOfSplitEntries}"
assertEquals "${expectedNumberOfEntries}" "${numberOfSplitEntries}"
}

testParseEntry() {
BIB2WEB_BIBTEX_FILE="${SOURCE_DIRECTORY}/../../testdata/article/minimum.bib"
processInputFile
local numberOfSplitEntries
numberOfSplitEntries=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib" | grep -c ".bib")

local entryFile
entryFile=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib")
parseEntry "${entryFile}"

local numberOfParsedEntries
numberOfParsedEntries=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib2web" | grep -c ".bib2web")
assertEquals "${numberOfSplitEntries}" "${numberOfParsedEntries}"
}

testParseSeveralEntries() {
BIB2WEB_BIBTEX_FILE="${SOURCE_DIRECTORY}/../../testdata/combined.bib"
processInputFile
local numberOfSplitEntries
numberOfSplitEntries=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib" | grep -c ".bib")

parseEntries

local numberOfParsedEntries
numberOfParsedEntries=$(find "${BIB2WEB_TMP_DIR}" -name "*.bib2web" | grep -c ".bib2web")
assertEquals "${numberOfSplitEntries}" "${numberOfParsedEntries}"
}
11 changes: 11 additions & 0 deletions bash/test/preprocessor-tests.bash
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ tearDown() {
fi
}

testToolsSetupSuccess() {
assertNotNull "${BIB2WEB_BASENAME}"
assertNotNull "${BIB2WEB_GREP}"
assertNotNull "${BIB2WEB_AWK}"
assertNotNull "${BIB2WEB_MKTEMP}"
assertNotNull "${BIB2WEB_RM}"
assertNotNull "${BIB2WEB_TOUCH}"
assertNotNull "${BIB2WEB_CAT}"
assertNotNull "${BIB2WEB_FIND}"
}

testSetUpFilesAndCleanUp() {
BIB2WEB_LOG_FILE="foo.log"

Expand Down
4 changes: 2 additions & 2 deletions testdata/article/minimum.bib
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
@article{ARTICLE-MINIMUM,
author = "Article Author",
title = "Article Title",
autHOr = "Article Author",
title = {Article Title},
journal = "Article Journal",
year = "Article Year",
volume = "Article Volume"
Expand Down
4 changes: 2 additions & 2 deletions testdata/combined.bib
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
@article{ARTICLE-MINIMUM,
author = "Article Author",
title = "Article Title",
journal = "Article Journal",
title = {Article {Title~Foo}},
journal = "Article\ Journal",
year = "Article Year",
volume = "Article Volume"
}
Expand Down

0 comments on commit 1899312

Please sign in to comment.