sagemath · mkoeppe · Dec 9, 2022 · Nov 24, 2022 · Nov 26, 2022 · Dec 2, 2022
diff --git a/migrate.cfg.sagetracwikionly b/migrate.cfg.sagetracwikionly
@@ -7,8 +7,8 @@
 # unauthenticated works for globally readable trac instances
 url: https://trac.sagemath.org/xmlrpc
 
-# Optional ticket_url if links to the Trac tickets should be set in md-documents
-ticket_url: https://trac.sagemath.org/ticket
+# Should references to tickets still point to Trac?
+keep_trac_ticket_references: yes
 
 # authentication broken with python3.8 or later, due to
 # https://github.com/python/cpython/issues/82219

diff --git a/migrate.py b/migrate.py
@@ -87,14 +87,15 @@
     config.read('migrate.cfg')
 
 trac_url = config.get('source', 'url')
+trac_url_dir = os.path.dirname(trac_url)
+trac_url_ticket = os.path.join(trac_url_dir, 'ticket')
+trac_url_wiki = os.path.join(trac_url_dir, 'wiki')
+trac_url_query = os.path.join(trac_url_dir, 'query')
+
 trac_path = None
 if config.has_option('source', 'path') :
     trac_path = config.get('source', 'path')
 
-trac_ticket_url = None
-if config.has_option('source', 'ticket_url') :
-    trac_ticket_url = config.get('source', 'ticket_url')
-
 github_api_url = config.get('target', 'url')
 github_token = None
 if config.has_option('target', 'token') :
@@ -137,6 +138,21 @@
 if must_convert_wiki :
     wiki_export_dir = config.get('wiki', 'export_dir')
 
+default_multilines = False
+if config.has_option('source', 'default_multilines') :
+    # set this boolean in the source section of the configuration file
+    # to change the default of the multilines flag in the function
+    # trac2markdown
+    default_multilines = config.getboolean('source', 'default_multilines')
+
+skip_line_with_leading_whitespaces = 0
+if config.has_option('source', 'skip_line_with_leading_whitespaces') :
+    # set this integer in the source section of the configuration file
+    # to the number of leading whitespaces that a line must have to
+    # be skipped in the function trac2markdown. Zero means that no
+    # line is skipped.
+    skip_line_with_leading_whitespaces = config.getint('source', 'skip_line_with_leading_whitespaces')
+
 #pattern_changeset = r'(?sm)In \[changeset:"([^"/]+?)(?:/[^"]+)?"\]:\n\{\{\{(\n#![^\n]+)?\n(.*?)\n\}\}\}'
 pattern_changeset = r'(?sm)In \[changeset:"[0-9]+" ([0-9]+)\]:\n\{\{\{(\n#![^\n]+)?\n(.*?)\n\}\}\}'
 matcher_changeset = re.compile(pattern_changeset)
@@ -173,7 +189,7 @@ def handle_svnrev_reference(m) :
         return m.group(0)
 
 
-def trac2markdown(text, base_path, multilines = True, trac_ticket_url=None) :
+def trac2markdown(text, base_path, conv_help, multilines = default_multilines) :
     text = matcher_changeset.sub(format_changeset_comment, text)
     text = matcher_changeset2.sub(r'\1', text)
 
@@ -193,12 +209,27 @@ def trac2markdown(text, base_path, multilines = True, trac_ticket_url=None) :
     if multilines:
         text = re.sub(r'^\S[^\n]+([^=-_|])\n([^\s`*0-9#=->-_|])', r'\1 \2', text)
 
-    text = re.sub(r'(?m)^======\s+(.*?)\s+======$', r'\n###### \1', text)
-    text = re.sub(r'(?m)^=====\s+(.*?)\s+=====$', r'\n##### \1', text)
-    text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'\n#### \1', text)
-    text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'\n### \1', text)
-    text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'\n## \1', text)
-    text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'\n# \1', text)
+    def convert_heading(level, text):
+        """
+        Return the given text with converted headdings
+        """
+        def replace(match):
+            """
+            Return the replacement for the headding
+            """
+            heading = match.groups()[0]
+            # There might be a second item if an anchor is set.
+            # We ignore this anchor since it is automatically
+            # set it GitHub Markdown.
+            return '%s %s' % (('#'*level), heading)
+
+        text = re.sub(r'(?m)^%s\s+([^=]+)[^\n=]*([\#][\w-]*)?$' % ('='*level), replace, text)
+        text = re.sub(r'(?m)^%s\s+(.*?)\s+%s[^\n]*([\#][\w-]*)?$' % ('='*level, '='*level), replace, text)
+        return text
+
+    for level in [6, 5, 4, 3, 2, 1]:
+        text = convert_heading(level, text)
+
     text = re.sub(r'^             * ', r'****', text)
     text = re.sub(r'^         * ', r'***', text)
     text = re.sub(r'^     * ', r'**', text)
@@ -208,44 +239,51 @@ def trac2markdown(text, base_path, multilines = True, trac_ticket_url=None) :
     a = []
     is_table = False
     for line in text.split('\n'):
-        if not line.startswith('    '):
-            line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]\]', r'[\2](\1)', line)
-            line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\]\]', r'[\1](\1)', line) # link without display text
-            line = re.sub(r'\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]', r'[\2](\1)', line)
-            line = re.sub(r'\[(https?://[^\s\[\]\|]+)\]', r'[\1](\1)', line)
-            line = re.sub(r'\[wiki:([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1.md)' % os.path.relpath('/wiki/', base_path), line)
-            line = re.sub(r'\[wiki:([^\s\[\]]+)\]', r'[\1](%s/\1.md)' % os.path.relpath('/wiki/', base_path), line) # link without display text
-            line = re.sub(r'\[/wiki/([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1.md)' % os.path.relpath('/wiki/', base_path), line)
-            line = re.sub(r'\[source:([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
-            line = re.sub(r'source:([\S]+)', r'[\1](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
-            line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
-            line = re.sub(r'\[\[Image\(source:([^(]+)\)\]\]', r'![](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
-            line = re.sub(r'\[\[Image\(([^(]+),\slink=([^(]+)\)\]\]', r'![\2](\1)', line)
-            line = re.sub(r'\[\[Image\(([^(]+)\)\]\]', r'![](\1)', line)
-            line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
-            line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
-            if trac_ticket_url:
-                # as long as the ticket themselfs have not been migrated they should reference to the original place
-                line = re.sub(r'\#([1-9]\d{0,4})', r'[#\1](%s/\1)' % trac_ticket_url, line)
-            if line.startswith('||'):
-                if not is_table:
-                    sep = re.sub(r'\|\|=', r'||:', line) # take care of left align
-                    sep = re.sub(r'=\|\|', r':||', sep)  # take care of right align
-                    sep = re.sub(r'[^|,^:]', r'-', sep)
-                    line = line + '\n' + sep
-                    is_table = True
-                # The wiki markup allows the alignment directives to be specified on a cell-by-cell
-                # basis. This is used in many examples. AFAIK this can't be properly translated into
-                # the GitHub markdown as it only allows to align statements column by column.
-                line = re.sub(r'\|\|=', r'||', line) # ignore cellwise align instructions
-                line = re.sub(r'=\|\|', r'||', line) # ignore cellwise align instructions
-                line = re.sub(r'\|\|', r'|', line)
-            else:
+        if skip_line_with_leading_whitespaces:
+            if line.startswith(' '*skip_line_with_leading_whitespaces):
                 is_table = False
+                continue
+
+        line = re.sub(r'\[query:\?', r'[%s?' % trac_url_query, line) # preconversion to URL format
+        line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]\]', r'[\2](\1)', line)
+        line = re.sub(r'\[\[(https?://[^\s\[\]\|]+)\]\]', r'[\1](\1)', line) # link without display text
+        line = re.sub(r'\[(https?://[^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]', r'[\2](\1)', line)
+        line = re.sub(r'\[(https?://[^\s\[\]\|]+)\]', r'[\1](\1)', line)
+        line = re.sub(r'\[wiki:"([^\[\]\|]+)["]\s*([^\[\]"]+)?["]?\]', conv_help.wiki_link, line) # for pagenames containing whitespaces
+        line = re.sub(r'\[wiki:([^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]', conv_help.wiki_link, line)
+        line = re.sub(r'\[wiki:([^\s\[\]]+)\]', conv_help.wiki_link, line) # link without display text
+        line = re.sub(r'\[/wiki/([^\s\[\]]+)\s+([^\[\]]+)\]', conv_help.wiki_link, line)
+        line = re.sub(r'\[source:([^\s\[\]]+)\s+([^\[\]]+)\]', r'[\2](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
+        line = re.sub(r'source:([\S]+)', r'[\1](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
+        line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
+        line = re.sub(r'\[\[Image\(source:([^(]+)\)\]\]', r'![](%s/\1)' % os.path.relpath('/tree/master/', base_path), line)
+        line = re.sub(r'\[\[Image\(([^(]+),\slink=([^(]+)\)\]\]', r'![\2](\1)', line)
+        line = re.sub(r'\[\[Image\(([^(]+)\)\]\]', r'![](\1)', line)
+        line = re.sub(r'\[\["([^\[\]\|]+)["]\s*([^\[\]"]+)?["]?\]\]', conv_help.wiki_link, line) # alternative wiki page reference for pagenames containing whitespaces
+        line = re.sub(r'\[\[([^\[\]\|]+)[\|]+\s*([^\[\]\|]+)?\]\]', conv_help.wiki_link, line) # alternative wiki page reference 2 for pagenames containing whitespaces
+        line = re.sub(r'\[\[([^\s\[\]\|]+)\s*[\s\|]\s*([^\[\]]+)\]\]', conv_help.wiki_link, line) # alternative wiki page reference
+        line = re.sub(r'\[\[([^\s\[\]]+)\]\]', conv_help.wiki_link, line) # alternative wiki page reference without display text
+        line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
+        line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
+        line = re.sub(r'[\s]%s/([1-9]\d{0,4})' % trac_url_ticket, r' #\1', line) # replace global ticket references
+        line = re.sub(r'\#([1-9]\d{0,4})', conv_help.ticket_link, line)
+        if line.startswith('||'):
+            if not is_table:
+                sep = re.sub(r'\|\|=', r'||:', line) # take care of left align
+                sep = re.sub(r'=\|\|', r':||', sep)  # take care of right align
+                sep = re.sub(r'[^|,^:]', r'-', sep)
+                line = line + '\n' + sep
+                is_table = True
+            # The wiki markup allows the alignment directives to be specified on a cell-by-cell
+            # basis. This is used in many examples. AFAIK this can't be properly translated into
+            # the GitHub markdown as it only allows to align statements column by column.
+            line = re.sub(r'\|\|=', r'||', line) # ignore cellwise align instructions
+            line = re.sub(r'=\|\|', r'||', line) # ignore cellwise align instructions
+            line = re.sub(r'\|\|', r'|', line)
         else:
             is_table = False
         a.append(line)
-    text = '\n'.join(a)
+        text = '\n'.join(a)
     return text
 
 
@@ -375,12 +413,14 @@ def gh_username(dest, origname) :
 def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
     milestone_map = {}
 
+    conv_help = ConversionHelper(source)
+
     if migrate_milestones:
         for milestone_name in source.ticket.milestone.getAll():
             milestone = source.ticket.milestone.get(milestone_name)
             print("Creating milestone " + milestone['name'])
             new_milestone = {
-                'description' : trac2markdown(milestone['description'], '/milestones/', False),
+                'description' : trac2markdown(milestone['description'], '/milestones/', conv_help, False),
                 'title' : milestone['name'],
                 'state' : 'open' if str(milestone['completed']) == '0'  else 'closed'
             }
@@ -554,7 +594,7 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
         if keywords != '' and not keywords_to_labels :
             description_pre += 'Keywords: ' + keywords + '\n\n'
 
-        description = description_pre + trac2markdown(description, '/issues/', False)
+        description = description_pre + trac2markdown(description, '/issues/', conv_help, False)
         #assert description.find('/wiki/') < 0, description
 
         # collect all parameters
@@ -603,7 +643,7 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
                     # empty description and not description of attachment
                     continue
                 note = {
-                    'note' : trac2markdown(desc, '/issues/', False)
+                    'note' : trac2markdown(desc, '/issues/', conv_help, False)
                 }
                 if attachment is not None :
                     note['attachment_name'] = attachment[4]  # name of attachment
@@ -692,7 +732,7 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
                 gh_comment_issue(dest, issue, { 'note' : 'Changing type from ' + change[3] + ' to ' + change[4] + '.', 'created_at' : change_time, 'author' : author })
                 gh_update_issue_property(dest, issue, 'labels', labels)
             elif change_type == "description" :
-                issue_data['description'] = description_pre + trac2markdown(change[4], '/issues/', False) + '\n\n(changed by ' + author + ' at ' + change_time + ')'
+                issue_data['description'] = description_pre + trac2markdown(change[4], '/issues/', conv_help, False) + '\n\n(changed by ' + author + ' at ' + change_time + ')'
                 gh_update_issue_property(dest, issue, 'description', issue_data['description'])
             elif change_type == "summary" :
                 issue_data['title'] = change[4]
@@ -744,13 +784,15 @@ def convert_issues(source, dest, only_issues = None, blacklist_issues = None):
             sleep(sleep_after_10tickets)
 
 
-def convert_wiki(source, dest, trac_ticket_url):
+def convert_wiki(source, dest):
     exclude_authors = ['trac']
 
     if not os.path.isdir(wiki_export_dir) :
         os.makedirs(wiki_export_dir)
 
     client.MultiCall(source)
+    conv_help = ConversionHelper(source)
+
     for pagename in source.wiki.getAllPages() :
         info = source.wiki.getPageInfo(pagename)
         if info['author'] in exclude_authors :
@@ -760,7 +802,7 @@ def convert_wiki(source, dest, trac_ticket_url):
         print ("Migrate Wikipage", pagename)
         if pagename == 'WikiStart' :
             pagename = 'Home'
-        converted = trac2markdown(page, os.path.dirname('/wiki/%s' % pagename), trac_ticket_url=trac_ticket_url)
+        converted = trac2markdown(page, os.path.dirname('/wiki/%s' % pagename), conv_help)
 
         attachments = []
         for attachment in source.wiki.listAttachments(pagename if pagename != 'Home' else 'WikiStart') :
@@ -798,6 +840,82 @@ def convert_wiki(source, dest, trac_ticket_url):
             codecs.open(outfile, 'w', 'utf-8').write(converted)
 
 
+class ConversionHelper:
+    """
+    A class that provides conversion methods that depend on information collected
+    at startup, such as Wiki page names and configuration flags.
+    """
+    def __init__(self, source):
+        """
+        The Python constructor collects all the necessary information.
+        """
+        pagenames = source.wiki.getAllPages()
+        pagenames_splitted = []
+        for p in pagenames:
+            pagenames_splitted += p.split('/')
+        pagenames_not_splitted = [p for p in pagenames if not p in pagenames_splitted]
+
+        self._pagenames_splitted = pagenames_splitted
+        self._pagenames_not_splitted = pagenames_not_splitted
+        self._keep_trac_ticket_references = False
+        if config.has_option('source', 'keep_trac_ticket_references') :
+            self._keep_trac_ticket_references = config.getboolean('source', 'keep_trac_ticket_references')
+
+    def ticket_link(self, match):
+        """
+        Return a formatted string that replaces the match object found by re.
+        """
+        ticket = match.groups()[0]
+        if self._keep_trac_ticket_references:
+            # as long as the ticket themselfs have not been migrated they should reference to the original place
+            return r'[#%s](%s/%s)' % (ticket, trac_url_ticket, ticket)
+        else:
+            # leave them as is
+            return r'#%s' % ticket
+
+    def wiki_link(self, match):
+        """
+        Return a formatted string that replaces the match object found by re.
+        """
+        mg = match.groups()
+        pagename = mg[0]
+        if len(mg) > 1:
+            display = mg[1]
+            if not display:
+                display = pagename
+        else:
+            display = pagename
+
+        # take care of section references
+        pagename_sect = pagename.split('#')
+        pagename_ori = pagename
+        if len(pagename_sect) > 1:
+            pagename = pagename_sect[0]
+            if not display:
+                display = pagename_sect[1]
+
+        if pagename.startswith('http'):
+            link = pagename_ori
+        elif pagename in self._pagenames_splitted:
+            link = pagename_ori
+        elif pagename in self._pagenames_not_splitted:
+            p_split = pagename_ori.split('/')
+            link = p_split[len(p_split) - 1]
+        else:
+            # we asume that this must be a Trac macro like PageOutline
+            # first lets extract arguments
+            macro_split = pagename.split('(')
+            macro = macro_split[0]
+            args = None
+            if len(macro_split) > 1:
+                args =  macro_split[1]
+            display = 'This is the Trac macro *%s* that was inherited from the migration' % macro
+            link = '%s/WikiMacros#%s-macro' % (trac_url_wiki, macro)
+            if args:
+                return r'[%s](%s) called with arguments (%s' % (display, link, args)
+        return r'[%s](%s)' % (display, link)
+
+
 if __name__ == "__main__":
     source = client.ServerProxy(trac_url)
 
@@ -841,6 +959,6 @@ def convert_wiki(source, dest, trac_ticket_url):
         convert_issues(source, dest, only_issues = only_issues, blacklist_issues = blacklist_issues)
 
     if must_convert_wiki:
-        convert_wiki(source, dest, trac_ticket_url)
+        convert_wiki(source, dest)
 
     print(f'Unmapped users: {sorted(unmapped_users)}')