A method to generate backlinks

This is a method I’m using to generate backlinks. It performs well so far, so I’d like to share it with you. The latest version of codes below can be found here.

The Python script to generate the data of references:

#! /usr/bin/env python

import os
import re
import json
rootdir = 'content/'
ref_in_heading = True
refs = {}
ref_pattern = r'\[.*?\]\({{(<\s*(rel)?ref\s+("(.+?)"|(\S+?))\s*>|%\s*(rel)?ref\s+("(.+?)"|(\S+?))\s*%)}}\)'
heading_pattern = r'^(#{1,6})\s+(.*?)(\s*{.*})?$'
ext_pattern = r'(\.md|/index\.md|/_index\.md)$'


# return (file, anchor)
#   - file: file path that can be use in Hugo, like 'series/病原生物学/_index.md'
#   - anchor: the anchor which is above the link, '' for empty heaing and top
def ref2pos(ref, reldir):
    [file, anchor] = ref.split('#') if len(ref.split('#')) == 2 else [ref, '']

    # remove '.md'
    if file.endswith('.md'):
        file = file[:-3]

    found = False
    if not file.startswith('/'):
        for subdir, _, files in os.walk(reldir):
            for f in files:
                full_path = os.path.join(subdir, f)
                if re.search('(' + re.escape(os.sep+file) + ')' + ext_pattern, full_path):
                    file = full_path
                    found = True
    if file.startswith('/') or found == False:
        for subdir, _, files in os.walk(rootdir):
            for f in files:
                full_path = os.path.join(subdir, f)
                if re.search('(' + os.path.join(re.escape(rootdir), "" if file.startswith('/') else ".*", re.escape(file)) + ')' + ext_pattern, full_path):
                    file = full_path

    return (file[len(rootdir):], anchor)


def get_refs(path):
    in_code_block = False
    file_from = re.sub(ext_pattern, '', path[len(rootdir):]) # remove rootdir and ext_pattern in path
    current_heading = '' # for empty heading and top

    for line in open(path, 'r').readlines():
        if re.match(r'```(|[^`].*)$', line): # starts with ```(xxxx)?
            in_code_block = not in_code_block
        if in_code_block:
            continue

        # try updating current heaing
        heading_results = re.findall(heading_pattern, line)
        if heading_results:
            current_heading = heading_results[0][1]
            if not ref_in_heading:
                continue

        # add to refs
        ref_results = re.findall(ref_pattern, line)
        if ref_results:
            for ref_result in ref_results:
                parent_dir = re.match(r'(.*'+re.escape(os.sep)+')(.*)', path)[1]
                pos = ref2pos(ref_result[3] or ref_result[4] or ref_result[7] or ref_result[8], parent_dir)

                # ensure refs[pos[0]][pos[1]] exists
                if pos[0] not in refs:
                    refs[pos[0]] = {}
                    refs[pos[0]]['file_ref'] = '/' + re.sub(ext_pattern, '', pos[0])
                    refs[pos[0]]['count'] = 0
                    refs[pos[0]]['link_here'] = { pos[1]: [] }
                elif pos[1] not in refs[pos[0]]['link_here']:
                    refs[pos[0]]['link_here'][pos[1]] = []

                if pos[0] == '':
                    print('Warning: empty filename:', '"'+ref_result[0]+'"', 'in', path)

                refs[pos[0]]['link_here'][pos[1]].append('/' + file_from + ('#'+current_heading if current_heading!='' else ''))
                refs[pos[0]]['count'] += 1


if __name__ == '__main__':
    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            if file.endswith('.md'):
                get_refs(os.path.join(subdir, file))

    if not os.path.exists('data/'):
        os.makedirs('data')
    with open('data/refs.json', 'w', encoding='utf-8') as f:
        json.dump(refs, f, ensure_ascii=False, indent=4)

Every time before you deploy your site, you should run this script in the root path of your site. It will generate a data file data/refs.json. It looks like this:

{
    "series/Pathogenic biology/Virus/_index.md": {
        "file_ref": "/series/Pathogenic biology/Virus",
        "count": 4,          // How many links link to this page
        "link_here": {
            "": [                                 // Links that just link to this page
                "/series/Pathogenic biology/Chlamydia",
                "/series/Pathogenic biology/Virus/Respiratory infection virus#Pathogenicity",
            ],
            "Prion": [                            // Links that link to the section "Prion" of this page
                "/series/Pathogenic biology#Microorganism"
            ],
            "Recombination and Reassortment": [   // Same...
                "/series/Pathogenic biology/Virus/Respiratory infection virus#Biological characteristics"
            ]
        }
    },
    // ...
}

Now you can use this file to render backlinks in your site.

In your render-heading.html(see Markdown Render Hooks), render backlinks that link to this heading, like [xxx]({{< relref "My awesome post" >}}):


{{/* render heading here */}}

{{- if and site.Data.refs .Page.File -}}
{{- if index site.Data.refs .Page.File.Path -}}
{{- if index (index site.Data.refs .Page.File.Path).link_here .Anchor -}}
<div class="cross-refs">
    {{- range $i,$v := index (index site.Data.refs .Page.File.Path).link_here .Anchor -}}
    {{- if ne $i 0 }},{{ end }}<a class='cross-ref' href="{{ relref $.Page $v }}">{{ index (last 1 (split $v "/")) 0 }}</a>
    {{- end -}}
</div>
{{- end -}}
{{- end -}}
{{- end -}}

And generate backlinks that don’t link to a heading, like [xxx]({{< relref "My awesome post#Section-1" >}}):


{{- if and .Site.Data.refs .Page.File -}}
{{- if index $.Site.Data.refs .Page.File.Path -}}
{{- if index (index $.Site.Data.refs .Page.File.Path).link_here "" -}}
<div class='cross-refs top'>
    {{- range $i,$v := index (index $.Site.Data.refs .Page.File.Path).link_here "" -}}
    {{- if ne $i 0 }},{{ end }}<a class='cross-ref' href="{{ relref $.Page $v }}">{{ index (last 1 (split $v "/")) 0 }}</a>
    {{- end -}}
</div>
{{- end -}}
{{- end -}}
{{- end -}}
2 Likes

Could you clarify the use of this, as quite don’t get that or simply my understanding what is “backlink” is different?

1 Like

See this too

Ok, but what are benefits of that. As for SEO backlinks mean linked to or from other website? Its more Internal linking or I read this wrong?

@Qihuan How about ~~~ in the file contents?

~~~ html
// some code here
~~~