Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions migrate.cfg.sagetracwikionly
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
# unauthenticated works for globally readable trac instances
url: https://trac.sagemath.org/xmlrpc

# Optional ticket_url if links to the Trac tickets should be set in md-documents
ticket_url: https://trac.sagemath.org/ticket
# Should references to tickets still point to Trac?
keep_trac_ticket_references: yes

# authentication broken with python3.8 or later, due to
# https://github.com/python/cpython/issues/82219
Expand Down
222 changes: 170 additions & 52 deletions migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,15 @@
config.read('migrate.cfg')

trac_url = config.get('source', 'url')
trac_url_dir = os.path.dirname(trac_url)
trac_url_ticket = os.path.join(trac_url_dir, 'ticket')
trac_url_wiki = os.path.join(trac_url_dir, 'wiki')
trac_url_query = os.path.join(trac_url_dir, 'query')

trac_path = None
if config.has_option('source', 'path') :
trac_path = config.get('source', 'path')

trac_ticket_url = None
if config.has_option('source', 'ticket_url') :
trac_ticket_url = config.get('source', 'ticket_url')

github_api_url = config.get('target', 'url')
github_token = None
if config.has_option('target', 'token') :
Expand Down Expand Up @@ -137,6 +138,21 @@
if must_convert_wiki :
wiki_export_dir = config.get('wiki', 'export_dir')

default_multilines = False
if config.has_option('source', 'default_multilines') :
# set this boolean in the source section of the configuration file
# to change the default of the multilines flag in the function
# trac2markdown
default_multilines = config.getboolean('source', 'default_multilines')

skip_line_with_leading_whitespaces = 0
if config.has_option('source', 'skip_line_with_leading_whitespaces') :
# set this integer in the source section of the configuration file
# to the number of leading whitespaces that a line must have to
# be skipped in the function trac2markdown. Zero means that no
# line is skipped.
skip_line_with_leading_whitespaces = config.getint('source', 'skip_line_with_leading_whitespaces')

#pattern_changeset = r'(?sm)In \[changeset:"([^"/]+?)(?:/[^"]+)?"\]:\n\{\{\{(\n#![^\n]+)?\n(.*?)\n\}\}\}'
pattern_changeset = r'(?sm)In \[changeset:"[0-9]+" ([0-9]+)\]:\n\{\{\{(\n#![^\n]+)?\n(.*?)\n\}\}\}'
matcher_changeset = re.compile(pattern_changeset)
Expand Down Expand Up @@ -173,7 +189,7 @@ def handle_svnrev_reference(m) :
return m.group(0)


def trac2markdown(text, base_path, multilines = True, trac_ticket_url=None) :
def trac2markdown(text, base_path, conv_help, multilines = default_multilines) :
text = matcher_changeset.sub(format_changeset_comment, text)
text = matcher_changeset2.sub(r'\1', text)

Expand All @@ -193,12 +209,27 @@ def trac2markdown(text, base_path, multilines = True, trac_ticket_url=None) :
if multilines:
text = re.sub(r'^\S[^\n]+([^=-_|])\n([^\s`*0-9#=->-_|])', r'\1 \2', text)

text = re.sub(r'(?m)^======\s+(.*?)\s+======$', r'\n###### \1', text)
text = re.sub(r'(?m)^=====\s+(.*?)\s+=====$', r'\n##### \1', text)
text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'\n#### \1', text)
text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'\n### \1', text)
text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'\n## \1', text)
text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'\n# \1', text)
def convert_heading(level, text):
"""
Return the given text with converted headdings
"""
def replace(match):
"""
Return the replacement for the headding
"""
heading = match.groups()[0]
# There might be a second item if an anchor is set.
# We ignore this anchor since it is automatically
# set it GitHub Markdown.
return '%s %s' % (('#'*level), heading)

text = re.sub(r'(?m)^%s\s+([^=]+)[^\n=]*([\#][\w-]*)?$' % ('='*level), replace, text)
text = re.sub(r'(?m)^%s\s+(.*?)\s+%s[^\n]*([\#][\w-]*)?$' % ('='*level, '='*level), replace, text)
return text

for level in [6, 5, 4, 3, 2, 1]:
text = convert_heading(level, text)

text = re.sub(r'^ * ', r'****', text)
text = re.sub(r'^ * ', r'***', text)
text = re.sub(r'^ * ', r'**', text)
Expand All @@ -208,44 +239,51 @@ def trac2markdown(text, base_path, multilines = True, trac_ticket_url=None) :
a = []
is_table = False
for line in text.split('\n'):
if not line.startswith(' '):
line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]\]', r'[\2](\1)', line)
line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\]\]', r'[\1](\1)', line) # link without display text
line = re.sub(r'\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]', r'[\2](\1)', line)
line = re.sub(r'\[(https?://[^\s\[\]\|]+)\]', r'[\1](\1)', line)
line = re.sub(r'\[wiki:([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1.md)' % os.path.relpath('/wiki/', base_path), line)
line = re.sub(r'\[wiki:([^\s\[\]]+)\]', r'[\1](%s/\1.md)' % os.path.relpath('/wiki/', base_path), line) # link without display text
line = re.sub(r'\[/wiki/([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1.md)' % os.path.relpath('/wiki/', base_path), line)
line = re.sub(r'\[source:([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
line = re.sub(r'source:([\S]+)', r'[\1](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
line = re.sub(r'\[\[Image\(source:([^(]+)\)\]\]', r'![](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
line = re.sub(r'\[\[Image\(([^(]+),\slink=([^(]+)\)\]\]', r'![\2](\1)', line)
line = re.sub(r'\[\[Image\(([^(]+)\)\]\]', r'![](\1)', line)
line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
if trac_ticket_url:
# as long as the ticket themselfs have not been migrated they should reference to the original place
line = re.sub(r'\#([1-9]\d{0,4})', r'[#\1](%s/\1)' % trac_ticket_url, line)
if line.startswith('||'):
if not is_table:
sep = re.sub(r'\|\|=', r'||:', line) # take care of left align
sep = re.sub(r'=\|\|', r':||', sep) # take care of right align
sep = re.sub(r'[^|,^:]', r'-', sep)
line = line + '\n' + sep
is_table = True
# The wiki markup allows the alignment directives to be specified on a cell-by-cell
# basis. This is used in many examples. AFAIK this can't be properly translated into
# the GitHub markdown as it only allows to align statements column by column.
line = re.sub(r'\|\|=', r'||', line) # ignore cellwise align instructions
line = re.sub(r'=\|\|', r'||', line) # ignore cellwise align instructions
line = re.sub(r'\|\|', r'|', line)
else:
if skip_line_with_leading_whitespaces:
if line.startswith(' '*skip_line_with_leading_whitespaces):
is_table = False
continue

line = re.sub(r'\[query:\?', r'[%s?' % trac_url_query, line) # preconversion to URL format
line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]\]', r'[\2](\1)', line)
line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\]\]', r'[\1](\1)', line) # link without display text
line = re.sub(r'\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]', r'[\2](\1)', line)
line = re.sub(r'\[(https?://[^\s\[\]\|]+)\]', r'[\1](\1)', line)
line = re.sub(r'\[wiki:"([^\[\]\|]+)["]\s*([^\[\]"]+)?["]?\]', conv_help.wiki_link, line) # for pagenames containing whitespaces
line = re.sub(r'\[wiki:([^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]', conv_help.wiki_link, line)
line = re.sub(r'\[wiki:([^\s\[\]]+)\]', conv_help.wiki_link, line) # link without display text
line = re.sub(r'\[/wiki/([^\s\[\]]+)\s+([^\[\]]+)\]', conv_help.wiki_link, line)
line = re.sub(r'\[source:([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
line = re.sub(r'source:([\S]+)', r'[\1](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
line = re.sub(r'\[\[Image\(source:([^(]+)\)\]\]', r'![](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
line = re.sub(r'\[\[Image\(([^(]+),\slink=([^(]+)\)\]\]', r'![\2](\1)', line)
line = re.sub(r'\[\[Image\(([^(]+)\)\]\]', r'![](\1)', line)
line = re.sub(r'\[\["([^\[\]\|]+)["]\s*([^\[\]"]+)?["]?\]\]', conv_help.wiki_link, line) # alternative wiki page reference for pagenames containing whitespaces
line = re.sub(r'\[\[([^\[\]\|]+)[\|]+\s*([^\[\]\|]+)?\]\]', conv_help.wiki_link, line) # alternative wiki page reference 2 for pagenames containing whitespaces
line = re.sub(r'\[\[([^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]\]', conv_help.wiki_link, line) # alternative wiki page reference
line = re.sub(r'\[\[([^\s\[\]]+)\]\]', conv_help.wiki_link, line) # alternative wiki page reference without display text
line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
line = re.sub(r'[\s]%s/([1-9]\d{0,4})' % trac_url_ticket, r' #\1', line) # replace global ticket references
line = re.sub(r'\#([1-9]\d{0,4})', conv_help.ticket_link, line)
if line.startswith('||'):
if not is_table:
sep = re.sub(r'\|\|=', r'||:', line) # take care of left align
sep = re.sub(r'=\|\|', r':||', sep) # take care of right align
sep = re.sub(r'[^|,^:]', r'-', sep)
line = line + '\n' + sep
is_table = True
# The wiki markup allows the alignment directives to be specified on a cell-by-cell
# basis. This is used in many examples. AFAIK this can't be properly translated into
# the GitHub markdown as it only allows to align statements column by column.
line = re.sub(r'\|\|=', r'||', line) # ignore cellwise align instructions
line = re.sub(r'=\|\|', r'||', line) # ignore cellwise align instructions
line = re.sub(r'\|\|', r'|', line)
else:
is_table = False
a.append(line)
text = '\n'.join(a)
text = '\n'.join(a)
return text


Expand Down Expand Up @@ -375,12 +413,14 @@ def gh_username(dest, origname) :
def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
milestone_map = {}

conv_help = ConversionHelper(source)

if migrate_milestones:
for milestone_name in source.ticket.milestone.getAll():
milestone = source.ticket.milestone.get(milestone_name)
print("Creating milestone " + milestone['name'])
new_milestone = {
'description' : trac2markdown(milestone['description'], '/milestones/', False),
'description' : trac2markdown(milestone['description'], '/milestones/', conv_help, False),
'title' : milestone['name'],
'state' : 'open' if str(milestone['completed']) == '0' else 'closed'
}
Expand Down Expand Up @@ -554,7 +594,7 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
if keywords != '' and not keywords_to_labels :
description_pre += 'Keywords: ' + keywords + '\n\n'

description = description_pre + trac2markdown(description, '/issues/', False)
description = description_pre + trac2markdown(description, '/issues/', conv_help, False)
#assert description.find('/wiki/') < 0, description

# collect all parameters
Expand Down Expand Up @@ -603,7 +643,7 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
# empty description and not description of attachment
continue
note = {
'note' : trac2markdown(desc, '/issues/', False)
'note' : trac2markdown(desc, '/issues/', conv_help, False)
}
if attachment is not None :
note['attachment_name'] = attachment[4] # name of attachment
Expand Down Expand Up @@ -692,7 +732,7 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
gh_comment_issue(dest, issue, { 'note' : 'Changing type from ' + change[3] + ' to ' + change[4] + '.', 'created_at' : change_time, 'author' : author })
gh_update_issue_property(dest, issue, 'labels', labels)
elif change_type == "description" :
issue_data['description'] = description_pre + trac2markdown(change[4], '/issues/', False) + '\n\n(changed by ' + author + ' at ' + change_time + ')'
issue_data['description'] = description_pre + trac2markdown(change[4], '/issues/', conv_help, False) + '\n\n(changed by ' + author + ' at ' + change_time + ')'
gh_update_issue_property(dest, issue, 'description', issue_data['description'])
elif change_type == "summary" :
issue_data['title'] = change[4]
Expand Down Expand Up @@ -744,13 +784,15 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
sleep(sleep_after_10tickets)


def convert_wiki(source, dest, trac_ticket_url):
def convert_wiki(source, dest):
exclude_authors = ['trac']

if not os.path.isdir(wiki_export_dir) :
os.makedirs(wiki_export_dir)

client.MultiCall(source)
conv_help = ConversionHelper(source)

for pagename in source.wiki.getAllPages() :
info = source.wiki.getPageInfo(pagename)
if info['author'] in exclude_authors :
Expand All @@ -760,7 +802,7 @@ def convert_wiki(source, dest, trac_ticket_url):
print ("Migrate Wikipage", pagename)
if pagename == 'WikiStart' :
pagename = 'Home'
converted = trac2markdown(page, os.path.dirname('/wiki/%s' % pagename), trac_ticket_url=trac_ticket_url)
converted = trac2markdown(page, os.path.dirname('/wiki/%s' % pagename), conv_help)

attachments = []
for attachment in source.wiki.listAttachments(pagename if pagename != 'Home' else 'WikiStart') :
Expand Down Expand Up @@ -798,6 +840,82 @@ def convert_wiki(source, dest, trac_ticket_url):
codecs.open(outfile, 'w', 'utf-8').write(converted)


class ConversionHelper:
"""
A class that provides conversion methods that depend on information collected
at startup, such as Wiki page names and configuration flags.
"""
def __init__(self, source):
"""
The Python constructor collects all the necessary information.
"""
pagenames = source.wiki.getAllPages()
pagenames_splitted = []
for p in pagenames:
pagenames_splitted += p.split('/')
pagenames_not_splitted = [p for p in pagenames if not p in pagenames_splitted]

self._pagenames_splitted = pagenames_splitted
self._pagenames_not_splitted = pagenames_not_splitted
self._keep_trac_ticket_references = False
if config.has_option('source', 'keep_trac_ticket_references') :
self._keep_trac_ticket_references = config.getboolean('source', 'keep_trac_ticket_references')

def ticket_link(self, match):
"""
Return a formatted string that replaces the match object found by re.
"""
ticket = match.groups()[0]
if self._keep_trac_ticket_references:
# as long as the ticket themselfs have not been migrated they should reference to the original place
return r'[#%s](%s/%s)' % (ticket, trac_url_ticket, ticket)
else:
# leave them as is
return r'#%s' % ticket

def wiki_link(self, match):
"""
Return a formatted string that replaces the match object found by re.
"""
mg = match.groups()
pagename = mg[0]
if len(mg) > 1:
display = mg[1]
if not display:
display = pagename
else:
display = pagename

# take care of section references
pagename_sect = pagename.split('#')
pagename_ori = pagename
if len(pagename_sect) > 1:
pagename = pagename_sect[0]
if not display:
display = pagename_sect[1]

if pagename.startswith('http'):
link = pagename_ori
elif pagename in self._pagenames_splitted:
link = pagename_ori
elif pagename in self._pagenames_not_splitted:
p_split = pagename_ori.split('/')
link = p_split[len(p_split) - 1]
else:
# we asume that this must be a Trac macro like PageOutline
# first lets extract arguments
macro_split = pagename.split('(')
macro = macro_split[0]
args = None
if len(macro_split) > 1:
args = macro_split[1]
display = 'This is the Trac macro *%s* that was inherited from the migration' % macro
link = '%s/WikiMacros#%s-macro' % (trac_url_wiki, macro)
if args:
return r'[%s](%s) called with arguments (%s' % (display, link, args)
return r'[%s](%s)' % (display, link)


if __name__ == "__main__":
source = client.ServerProxy(trac_url)

Expand Down Expand Up @@ -841,6 +959,6 @@ def convert_wiki(source, dest, trac_ticket_url):
convert_issues(source, dest, only_issues = only_issues, blacklist_issues = blacklist_issues)

if must_convert_wiki:
convert_wiki(source, dest, trac_ticket_url)
convert_wiki(source, dest)

print(f'Unmapped users: {sorted(unmapped_users)}')