From 381bff129b70471e86e99a2d6b6a7e090f13287e Mon Sep 17 00:00:00 2001 From: Jefferson Julio Date: Sun, 30 May 2021 22:53:32 -0300 Subject: Better article parsing, support for article code parsing (add line numbers to code blocks) --- utils/parse-article-content.sh | 62 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100755 utils/parse-article-content.sh (limited to 'utils/parse-article-content.sh') diff --git a/utils/parse-article-content.sh b/utils/parse-article-content.sh new file mode 100755 index 0000000..40593c5 --- /dev/null +++ b/utils/parse-article-content.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Skip file metadata, first 3 lines + +sanitize-html-entities () { + echo "$1" | + sed 's//\>/g' +} + +parse-article-content-file () { + local PRE_TAG= + local IGNORE_NEXT_PRE_TAG= + local INSIDE_PRE_TAG= + local CONTENT + local TLINE= + + while IFS= read -r line; do + TLINE="$line" + + if [ -z "$INSIDE_PRE_TAG" ]; then + echo "$line" | grep '
 /dev/null
+      if [ $? -eq 0 ]; then
+        PRE_TAG=$line
+        INSIDE_PRE_TAG=1
+        TLINE="$line "
+      fi
+    else
+      echo "$line" | grep ' /dev/null
+      if [ "$?" -eq 0 ]; then
+        IGNORE_NEXT_PRE_TAG=1
+        TLINE="$(sanitize-html-entities "$line")"
+      fi
+
+      echo "$line" | grep '
' > /dev/null + if [ $? -eq 0 ]; then + if [ -n "$IGNORE_NEXT_PRE_TAG" ]; then + IGNORE_NEXT_PRE_TAG="" + TLINE="$(sanitize-html-entities "$line")" + else + INSIDE_PRE_TAG="" + PRE_TAG="" + TLINE="$line" + fi + else + TLINE="$(sanitize-html-entities "$line")" + fi + + fi + + CONTENT="${CONTENT} +$TLINE" + done <<< $(echo "$ARTICLE_FILE_CONTENT" | tail -n +3) + + echo "$CONTENT" +} + +if [ "$DO_NOT_PROCESS_HTML" = "true" ]; then + ARTICLE_CONTENT=$(echo "$ARTICLE_FILE_CONTENT" | tail -n +3) +else + ARTICLE_CONTENT=$(parse-article-content-file) +fi -- cgit v1.2.3