aboutsummaryrefslogtreecommitdiff
path: root/utils/parse-article-content.sh
blob: 40593c51cf08fd760bba751eb40d0e3fd7bfc43c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash

# Skip file metadata, first 3 lines

sanitize-html-entities () {
  echo "$1" |
  sed 's/</\&lt;/g' |
  sed 's/>/\&gt;/g'
}

parse-article-content-file () {
  local PRE_TAG=
  local IGNORE_NEXT_PRE_TAG=
  local INSIDE_PRE_TAG=
  local CONTENT
  local TLINE=

  while IFS= read -r line; do
    TLINE="$line"

    if [ -z "$INSIDE_PRE_TAG" ]; then
      echo "$line" | grep '<pre role="code"' > /dev/null
      if [ $? -eq 0 ]; then
        PRE_TAG=$line
        INSIDE_PRE_TAG=1
        TLINE="$line <!-- code-start -->"
      fi
    else
      echo "$line" | grep '<pre' > /dev/null
      if [ "$?" -eq 0 ]; then
        IGNORE_NEXT_PRE_TAG=1
        TLINE="<code>$(sanitize-html-entities "$line")</code>"
      fi

      echo "$line" | grep '</pre>' > /dev/null
      if [ $? -eq 0 ]; then
        if [ -n "$IGNORE_NEXT_PRE_TAG" ]; then
          IGNORE_NEXT_PRE_TAG=""
          TLINE="<code>$(sanitize-html-entities "$line")</code>"
        else
          INSIDE_PRE_TAG=""
          PRE_TAG=""
          TLINE="$line<!-- code-end -->"
        fi
      else
        TLINE="<code>$(sanitize-html-entities "$line")</code>"
      fi

    fi

    CONTENT="${CONTENT}
$TLINE"
  done <<< $(echo "$ARTICLE_FILE_CONTENT" | tail -n +3)

  echo "$CONTENT"
}

if [ "$DO_NOT_PROCESS_HTML" = "true" ]; then
  ARTICLE_CONTENT=$(echo "$ARTICLE_FILE_CONTENT" | tail -n +3)
else
  ARTICLE_CONTENT=$(parse-article-content-file)
fi