diff --git a/migrate.cfg.sagetracwikionly b/migrate.cfg.sagetracwikionly index 471ced9912..35da8081e3 100644 --- a/migrate.cfg.sagetracwikionly +++ b/migrate.cfg.sagetracwikionly @@ -7,8 +7,8 @@ # unauthenticated works for globally readable trac instances url: https://trac.sagemath.org/xmlrpc -# Optional ticket_url if links to the Trac tickets should be set in md-documents -ticket_url: https://trac.sagemath.org/ticket +# Should references to tickets still point to Trac? +keep_trac_ticket_references: yes # authentication broken with python3.8 or later, due to # https://github.com/python/cpython/issues/82219 diff --git a/migrate.py b/migrate.py index 7be0b4aa50..661c748e6e 100755 --- a/migrate.py +++ b/migrate.py @@ -87,14 +87,15 @@ config.read('migrate.cfg') trac_url = config.get('source', 'url') +trac_url_dir = os.path.dirname(trac_url) +trac_url_ticket = os.path.join(trac_url_dir, 'ticket') +trac_url_wiki = os.path.join(trac_url_dir, 'wiki') +trac_url_query = os.path.join(trac_url_dir, 'query') + trac_path = None if config.has_option('source', 'path') : trac_path = config.get('source', 'path') -trac_ticket_url = None -if config.has_option('source', 'ticket_url') : - trac_ticket_url = config.get('source', 'ticket_url') - github_api_url = config.get('target', 'url') github_token = None if config.has_option('target', 'token') : @@ -137,6 +138,21 @@ if must_convert_wiki : wiki_export_dir = config.get('wiki', 'export_dir') +default_multilines = False +if config.has_option('source', 'default_multilines') : + # set this boolean in the source section of the configuration file + # to change the default of the multilines flag in the function + # trac2markdown + default_multilines = config.getboolean('source', 'default_multilines') + +skip_line_with_leading_whitespaces = 0 +if config.has_option('source', 'skip_line_with_leading_whitespaces') : + # set this integer in the source section of the configuration file + # to the number of leading whitespaces that a line must have to + # be skipped in the function trac2markdown. Zero means that no + # line is skipped. + skip_line_with_leading_whitespaces = config.getint('source', 'skip_line_with_leading_whitespaces') + #pattern_changeset = r'(?sm)In \[changeset:"([^"/]+?)(?:/[^"]+)?"\]:\n\{\{\{(\n#![^\n]+)?\n(.*?)\n\}\}\}' pattern_changeset = r'(?sm)In \[changeset:"[0-9]+" ([0-9]+)\]:\n\{\{\{(\n#![^\n]+)?\n(.*?)\n\}\}\}' matcher_changeset = re.compile(pattern_changeset) @@ -173,7 +189,7 @@ def handle_svnrev_reference(m) : return m.group(0) -def trac2markdown(text, base_path, multilines = True, trac_ticket_url=None) : +def trac2markdown(text, base_path, conv_help, multilines = default_multilines) : text = matcher_changeset.sub(format_changeset_comment, text) text = matcher_changeset2.sub(r'\1', text) @@ -193,12 +209,27 @@ def trac2markdown(text, base_path, multilines = True, trac_ticket_url=None) : if multilines: text = re.sub(r'^\S[^\n]+([^=-_|])\n([^\s`*0-9#=->-_|])', r'\1 \2', text) - text = re.sub(r'(?m)^======\s+(.*?)\s+======$', r'\n###### \1', text) - text = re.sub(r'(?m)^=====\s+(.*?)\s+=====$', r'\n##### \1', text) - text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'\n#### \1', text) - text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'\n### \1', text) - text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'\n## \1', text) - text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'\n# \1', text) + def convert_heading(level, text): + """ + Return the given text with converted headdings + """ + def replace(match): + """ + Return the replacement for the headding + """ + heading = match.groups()[0] + # There might be a second item if an anchor is set. + # We ignore this anchor since it is automatically + # set it GitHub Markdown. + return '%s %s' % (('#'*level), heading) + + text = re.sub(r'(?m)^%s\s+([^=]+)[^\n=]*([\#][\w-]*)?$' % ('='*level), replace, text) + text = re.sub(r'(?m)^%s\s+(.*?)\s+%s[^\n]*([\#][\w-]*)?$' % ('='*level, '='*level), replace, text) + return text + + for level in [6, 5, 4, 3, 2, 1]: + text = convert_heading(level, text) + text = re.sub(r'^ * ', r'****', text) text = re.sub(r'^ * ', r'***', text) text = re.sub(r'^ * ', r'**', text) @@ -208,44 +239,51 @@ def trac2markdown(text, base_path, multilines = True, trac_ticket_url=None) : a = [] is_table = False for line in text.split('\n'): - if not line.startswith(' '): - line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]\]', r'[\2](\1)', line) - line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\]\]', r'[\1](\1)', line) # link without display text - line = re.sub(r'\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]', r'[\2](\1)', line) - line = re.sub(r'\[(https?://[^\s\[\]\|]+)\]', r'[\1](\1)', line) - line = re.sub(r'\[wiki:([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1.md)' % os.path.relpath('/wiki/', base_path), line) - line = re.sub(r'\[wiki:([^\s\[\]]+)\]', r'[\1](%s/\1.md)' % os.path.relpath('/wiki/', base_path), line) # link without display text - line = re.sub(r'\[/wiki/([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1.md)' % os.path.relpath('/wiki/', base_path), line) - line = re.sub(r'\[source:([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1)' % os.path.relpath('/tree/master/', base_path), line) - line = re.sub(r'source:([\S]+)', r'[\1](%s/\1)' % os.path.relpath('/tree/master/', base_path), line) - line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line) - line = re.sub(r'\[\[Image\(source:([^(]+)\)\]\]', r'![](%s/\1)' % os.path.relpath('/tree/master/', base_path), line) - line = re.sub(r'\[\[Image\(([^(]+),\slink=([^(]+)\)\]\]', r'![\2](\1)', line) - line = re.sub(r'\[\[Image\(([^(]+)\)\]\]', r'![](\1)', line) - line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line) - line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line) - if trac_ticket_url: - # as long as the ticket themselfs have not been migrated they should reference to the original place - line = re.sub(r'\#([1-9]\d{0,4})', r'[#\1](%s/\1)' % trac_ticket_url, line) - if line.startswith('||'): - if not is_table: - sep = re.sub(r'\|\|=', r'||:', line) # take care of left align - sep = re.sub(r'=\|\|', r':||', sep) # take care of right align - sep = re.sub(r'[^|,^:]', r'-', sep) - line = line + '\n' + sep - is_table = True - # The wiki markup allows the alignment directives to be specified on a cell-by-cell - # basis. This is used in many examples. AFAIK this can't be properly translated into - # the GitHub markdown as it only allows to align statements column by column. - line = re.sub(r'\|\|=', r'||', line) # ignore cellwise align instructions - line = re.sub(r'=\|\|', r'||', line) # ignore cellwise align instructions - line = re.sub(r'\|\|', r'|', line) - else: + if skip_line_with_leading_whitespaces: + if line.startswith(' '*skip_line_with_leading_whitespaces): is_table = False + continue + + line = re.sub(r'\[query:\?', r'[%s?' % trac_url_query, line) # preconversion to URL format + line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]\]', r'[\2](\1)', line) + line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\]\]', r'[\1](\1)', line) # link without display text + line = re.sub(r'\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]', r'[\2](\1)', line) + line = re.sub(r'\[(https?://[^\s\[\]\|]+)\]', r'[\1](\1)', line) + line = re.sub(r'\[wiki:"([^\[\]\|]+)["]\s*([^\[\]"]+)?["]?\]', conv_help.wiki_link, line) # for pagenames containing whitespaces + line = re.sub(r'\[wiki:([^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]', conv_help.wiki_link, line) + line = re.sub(r'\[wiki:([^\s\[\]]+)\]', conv_help.wiki_link, line) # link without display text + line = re.sub(r'\[/wiki/([^\s\[\]]+)\s+([^\[\]]+)\]', conv_help.wiki_link, line) + line = re.sub(r'\[source:([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1)' % os.path.relpath('/tree/master/', base_path), line) + line = re.sub(r'source:([\S]+)', r'[\1](%s/\1)' % os.path.relpath('/tree/master/', base_path), line) + line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line) + line = re.sub(r'\[\[Image\(source:([^(]+)\)\]\]', r'![](%s/\1)' % os.path.relpath('/tree/master/', base_path), line) + line = re.sub(r'\[\[Image\(([^(]+),\slink=([^(]+)\)\]\]', r'![\2](\1)', line) + line = re.sub(r'\[\[Image\(([^(]+)\)\]\]', r'![](\1)', line) + line = re.sub(r'\[\["([^\[\]\|]+)["]\s*([^\[\]"]+)?["]?\]\]', conv_help.wiki_link, line) # alternative wiki page reference for pagenames containing whitespaces + line = re.sub(r'\[\[([^\[\]\|]+)[\|]+\s*([^\[\]\|]+)?\]\]', conv_help.wiki_link, line) # alternative wiki page reference 2 for pagenames containing whitespaces + line = re.sub(r'\[\[([^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]\]', conv_help.wiki_link, line) # alternative wiki page reference + line = re.sub(r'\[\[([^\s\[\]]+)\]\]', conv_help.wiki_link, line) # alternative wiki page reference without display text + line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line) + line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line) + line = re.sub(r'[\s]%s/([1-9]\d{0,4})' % trac_url_ticket, r' #\1', line) # replace global ticket references + line = re.sub(r'\#([1-9]\d{0,4})', conv_help.ticket_link, line) + if line.startswith('||'): + if not is_table: + sep = re.sub(r'\|\|=', r'||:', line) # take care of left align + sep = re.sub(r'=\|\|', r':||', sep) # take care of right align + sep = re.sub(r'[^|,^:]', r'-', sep) + line = line + '\n' + sep + is_table = True + # The wiki markup allows the alignment directives to be specified on a cell-by-cell + # basis. This is used in many examples. AFAIK this can't be properly translated into + # the GitHub markdown as it only allows to align statements column by column. + line = re.sub(r'\|\|=', r'||', line) # ignore cellwise align instructions + line = re.sub(r'=\|\|', r'||', line) # ignore cellwise align instructions + line = re.sub(r'\|\|', r'|', line) else: is_table = False a.append(line) - text = '\n'.join(a) + text = '\n'.join(a) return text @@ -375,12 +413,14 @@ def gh_username(dest, origname) : def convert_issues(source, dest, only_issues = None, blacklist_issues = None): milestone_map = {} + conv_help = ConversionHelper(source) + if migrate_milestones: for milestone_name in source.ticket.milestone.getAll(): milestone = source.ticket.milestone.get(milestone_name) print("Creating milestone " + milestone['name']) new_milestone = { - 'description' : trac2markdown(milestone['description'], '/milestones/', False), + 'description' : trac2markdown(milestone['description'], '/milestones/', conv_help, False), 'title' : milestone['name'], 'state' : 'open' if str(milestone['completed']) == '0' else 'closed' } @@ -554,7 +594,7 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None): if keywords != '' and not keywords_to_labels : description_pre += 'Keywords: ' + keywords + '\n\n' - description = description_pre + trac2markdown(description, '/issues/', False) + description = description_pre + trac2markdown(description, '/issues/', conv_help, False) #assert description.find('/wiki/') < 0, description # collect all parameters @@ -603,7 +643,7 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None): # empty description and not description of attachment continue note = { - 'note' : trac2markdown(desc, '/issues/', False) + 'note' : trac2markdown(desc, '/issues/', conv_help, False) } if attachment is not None : note['attachment_name'] = attachment[4] # name of attachment @@ -692,7 +732,7 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None): gh_comment_issue(dest, issue, { 'note' : 'Changing type from ' + change[3] + ' to ' + change[4] + '.', 'created_at' : change_time, 'author' : author }) gh_update_issue_property(dest, issue, 'labels', labels) elif change_type == "description" : - issue_data['description'] = description_pre + trac2markdown(change[4], '/issues/', False) + '\n\n(changed by ' + author + ' at ' + change_time + ')' + issue_data['description'] = description_pre + trac2markdown(change[4], '/issues/', conv_help, False) + '\n\n(changed by ' + author + ' at ' + change_time + ')' gh_update_issue_property(dest, issue, 'description', issue_data['description']) elif change_type == "summary" : issue_data['title'] = change[4] @@ -744,13 +784,15 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None): sleep(sleep_after_10tickets) -def convert_wiki(source, dest, trac_ticket_url): +def convert_wiki(source, dest): exclude_authors = ['trac'] if not os.path.isdir(wiki_export_dir) : os.makedirs(wiki_export_dir) client.MultiCall(source) + conv_help = ConversionHelper(source) + for pagename in source.wiki.getAllPages() : info = source.wiki.getPageInfo(pagename) if info['author'] in exclude_authors : @@ -760,7 +802,7 @@ def convert_wiki(source, dest, trac_ticket_url): print ("Migrate Wikipage", pagename) if pagename == 'WikiStart' : pagename = 'Home' - converted = trac2markdown(page, os.path.dirname('/wiki/%s' % pagename), trac_ticket_url=trac_ticket_url) + converted = trac2markdown(page, os.path.dirname('/wiki/%s' % pagename), conv_help) attachments = [] for attachment in source.wiki.listAttachments(pagename if pagename != 'Home' else 'WikiStart') : @@ -798,6 +840,82 @@ def convert_wiki(source, dest, trac_ticket_url): codecs.open(outfile, 'w', 'utf-8').write(converted) +class ConversionHelper: + """ + A class that provides conversion methods that depend on information collected + at startup, such as Wiki page names and configuration flags. + """ + def __init__(self, source): + """ + The Python constructor collects all the necessary information. + """ + pagenames = source.wiki.getAllPages() + pagenames_splitted = [] + for p in pagenames: + pagenames_splitted += p.split('/') + pagenames_not_splitted = [p for p in pagenames if not p in pagenames_splitted] + + self._pagenames_splitted = pagenames_splitted + self._pagenames_not_splitted = pagenames_not_splitted + self._keep_trac_ticket_references = False + if config.has_option('source', 'keep_trac_ticket_references') : + self._keep_trac_ticket_references = config.getboolean('source', 'keep_trac_ticket_references') + + def ticket_link(self, match): + """ + Return a formatted string that replaces the match object found by re. + """ + ticket = match.groups()[0] + if self._keep_trac_ticket_references: + # as long as the ticket themselfs have not been migrated they should reference to the original place + return r'[#%s](%s/%s)' % (ticket, trac_url_ticket, ticket) + else: + # leave them as is + return r'#%s' % ticket + + def wiki_link(self, match): + """ + Return a formatted string that replaces the match object found by re. + """ + mg = match.groups() + pagename = mg[0] + if len(mg) > 1: + display = mg[1] + if not display: + display = pagename + else: + display = pagename + + # take care of section references + pagename_sect = pagename.split('#') + pagename_ori = pagename + if len(pagename_sect) > 1: + pagename = pagename_sect[0] + if not display: + display = pagename_sect[1] + + if pagename.startswith('http'): + link = pagename_ori + elif pagename in self._pagenames_splitted: + link = pagename_ori + elif pagename in self._pagenames_not_splitted: + p_split = pagename_ori.split('/') + link = p_split[len(p_split) - 1] + else: + # we asume that this must be a Trac macro like PageOutline + # first lets extract arguments + macro_split = pagename.split('(') + macro = macro_split[0] + args = None + if len(macro_split) > 1: + args = macro_split[1] + display = 'This is the Trac macro *%s* that was inherited from the migration' % macro + link = '%s/WikiMacros#%s-macro' % (trac_url_wiki, macro) + if args: + return r'[%s](%s) called with arguments (%s' % (display, link, args) + return r'[%s](%s)' % (display, link) + + if __name__ == "__main__": source = client.ServerProxy(trac_url) @@ -841,6 +959,6 @@ def convert_wiki(source, dest, trac_ticket_url): convert_issues(source, dest, only_issues = only_issues, blacklist_issues = blacklist_issues) if must_convert_wiki: - convert_wiki(source, dest, trac_ticket_url) + convert_wiki(source, dest) print(f'Unmapped users: {sorted(unmapped_users)}')