aboutsummaryrefslogtreecommitdiff
path: root/utils/parse-article-content.sh
diff options
context:
space:
mode:
Diffstat (limited to 'utils/parse-article-content.sh')
-rwxr-xr-xutils/parse-article-content.sh62
1 files changed, 62 insertions, 0 deletions
diff --git a/utils/parse-article-content.sh b/utils/parse-article-content.sh
new file mode 100755
index 0000000..40593c5
--- /dev/null
+++ b/utils/parse-article-content.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Skip file metadata, first 3 lines
+
+sanitize-html-entities () {
+ echo "$1" |
+ sed 's/</\&lt;/g' |
+ sed 's/>/\&gt;/g'
+}
+
+parse-article-content-file () {
+ local PRE_TAG=
+ local IGNORE_NEXT_PRE_TAG=
+ local INSIDE_PRE_TAG=
+ local CONTENT
+ local TLINE=
+
+ while IFS= read -r line; do
+ TLINE="$line"
+
+ if [ -z "$INSIDE_PRE_TAG" ]; then
+ echo "$line" | grep '<pre role="code"' > /dev/null
+ if [ $? -eq 0 ]; then
+ PRE_TAG=$line
+ INSIDE_PRE_TAG=1
+ TLINE="$line <!-- code-start -->"
+ fi
+ else
+ echo "$line" | grep '<pre' > /dev/null
+ if [ "$?" -eq 0 ]; then
+ IGNORE_NEXT_PRE_TAG=1
+ TLINE="<code>$(sanitize-html-entities "$line")</code>"
+ fi
+
+ echo "$line" | grep '</pre>' > /dev/null
+ if [ $? -eq 0 ]; then
+ if [ -n "$IGNORE_NEXT_PRE_TAG" ]; then
+ IGNORE_NEXT_PRE_TAG=""
+ TLINE="<code>$(sanitize-html-entities "$line")</code>"
+ else
+ INSIDE_PRE_TAG=""
+ PRE_TAG=""
+ TLINE="$line<!-- code-end -->"
+ fi
+ else
+ TLINE="<code>$(sanitize-html-entities "$line")</code>"
+ fi
+
+ fi
+
+ CONTENT="${CONTENT}
+$TLINE"
+ done <<< $(echo "$ARTICLE_FILE_CONTENT" | tail -n +3)
+
+ echo "$CONTENT"
+}
+
+if [ "$DO_NOT_PROCESS_HTML" = "true" ]; then
+ ARTICLE_CONTENT=$(echo "$ARTICLE_FILE_CONTENT" | tail -n +3)
+else
+ ARTICLE_CONTENT=$(parse-article-content-file)
+fi