This is a method I’m using to generate backlinks. It performs well so far, so I’d like to share it with you. The latest version of codes below can be found here.
The Python script to generate the data of references:
#! /usr/bin/env python
import os
import re
import json
rootdir = 'content/'
ref_in_heading = True
refs = {}
ref_pattern = r'\[.*?\]\({{(<\s*(rel)?ref\s+("(.+?)"|(\S+?))\s*>|%\s*(rel)?ref\s+("(.+?)"|(\S+?))\s*%)}}\)'
heading_pattern = r'^(#{1,6})\s+(.*?)(\s*{.*})?$'
ext_pattern = r'(\.md|/index\.md|/_index\.md)$'
# return (file, anchor)
# - file: file path that can be use in Hugo, like 'series/病原生物学/_index.md'
# - anchor: the anchor which is above the link, '' for empty heaing and top
def ref2pos(ref, reldir):
[file, anchor] = ref.split('#') if len(ref.split('#')) == 2 else [ref, '']
# remove '.md'
if file.endswith('.md'):
file = file[:-3]
found = False
if not file.startswith('/'):
for subdir, _, files in os.walk(reldir):
for f in files:
full_path = os.path.join(subdir, f)
if re.search('(' + re.escape(os.sep+file) + ')' + ext_pattern, full_path):
file = full_path
found = True
if file.startswith('/') or found == False:
for subdir, _, files in os.walk(rootdir):
for f in files:
full_path = os.path.join(subdir, f)
if re.search('(' + os.path.join(re.escape(rootdir), "" if file.startswith('/') else ".*", re.escape(file)) + ')' + ext_pattern, full_path):
file = full_path
return (file[len(rootdir):], anchor)
def get_refs(path):
in_code_block = False
file_from = re.sub(ext_pattern, '', path[len(rootdir):]) # remove rootdir and ext_pattern in path
current_heading = '' # for empty heading and top
for line in open(path, 'r').readlines():
if re.match(r'```(|[^`].*)$', line): # starts with ```(xxxx)?
in_code_block = not in_code_block
if in_code_block:
continue
# try updating current heaing
heading_results = re.findall(heading_pattern, line)
if heading_results:
current_heading = heading_results[0][1]
if not ref_in_heading:
continue
# add to refs
ref_results = re.findall(ref_pattern, line)
if ref_results:
for ref_result in ref_results:
parent_dir = re.match(r'(.*'+re.escape(os.sep)+')(.*)', path)[1]
pos = ref2pos(ref_result[3] or ref_result[4] or ref_result[7] or ref_result[8], parent_dir)
# ensure refs[pos[0]][pos[1]] exists
if pos[0] not in refs:
refs[pos[0]] = {}
refs[pos[0]]['file_ref'] = '/' + re.sub(ext_pattern, '', pos[0])
refs[pos[0]]['count'] = 0
refs[pos[0]]['link_here'] = { pos[1]: [] }
elif pos[1] not in refs[pos[0]]['link_here']:
refs[pos[0]]['link_here'][pos[1]] = []
if pos[0] == '':
print('Warning: empty filename:', '"'+ref_result[0]+'"', 'in', path)
refs[pos[0]]['link_here'][pos[1]].append('/' + file_from + ('#'+current_heading if current_heading!='' else ''))
refs[pos[0]]['count'] += 1
if __name__ == '__main__':
for subdir, dirs, files in os.walk(rootdir):
for file in files:
if file.endswith('.md'):
get_refs(os.path.join(subdir, file))
if not os.path.exists('data/'):
os.makedirs('data')
with open('data/refs.json', 'w', encoding='utf-8') as f:
json.dump(refs, f, ensure_ascii=False, indent=4)
Every time before you deploy your site, you should run this script in the root path of your site. It will generate a data file data/refs.json
. It looks like this:
{
"series/Pathogenic biology/Virus/_index.md": {
"file_ref": "/series/Pathogenic biology/Virus",
"count": 4, // How many links link to this page
"link_here": {
"": [ // Links that just link to this page
"/series/Pathogenic biology/Chlamydia",
"/series/Pathogenic biology/Virus/Respiratory infection virus#Pathogenicity",
],
"Prion": [ // Links that link to the section "Prion" of this page
"/series/Pathogenic biology#Microorganism"
],
"Recombination and Reassortment": [ // Same...
"/series/Pathogenic biology/Virus/Respiratory infection virus#Biological characteristics"
]
}
},
// ...
}
Now you can use this file to render backlinks in your site.
In your render-heading.html
(see Markdown Render Hooks), render backlinks that link to this heading, like [xxx]({{< relref "My awesome post" >}})
:
{{/* render heading here */}}
{{- if and site.Data.refs .Page.File -}}
{{- if index site.Data.refs .Page.File.Path -}}
{{- if index (index site.Data.refs .Page.File.Path).link_here .Anchor -}}
<div class="cross-refs">
{{- range $i,$v := index (index site.Data.refs .Page.File.Path).link_here .Anchor -}}
{{- if ne $i 0 }},{{ end }}<a class='cross-ref' href="{{ relref $.Page $v }}">{{ index (last 1 (split $v "/")) 0 }}</a>
{{- end -}}
</div>
{{- end -}}
{{- end -}}
{{- end -}}
And generate backlinks that don’t link to a heading, like [xxx]({{< relref "My awesome post#Section-1" >}})
:
{{- if and .Site.Data.refs .Page.File -}}
{{- if index $.Site.Data.refs .Page.File.Path -}}
{{- if index (index $.Site.Data.refs .Page.File.Path).link_here "" -}}
<div class='cross-refs top'>
{{- range $i,$v := index (index $.Site.Data.refs .Page.File.Path).link_here "" -}}
{{- if ne $i 0 }},{{ end }}<a class='cross-ref' href="{{ relref $.Page $v }}">{{ index (last 1 (split $v "/")) 0 }}</a>
{{- end -}}
</div>
{{- end -}}
{{- end -}}
{{- end -}}