From 4aff509e6cba68a756e23c3cddb3491472a30e51 Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 13 May 2011 14:49:03 +0200 Subject: no point in keeping two readmes --- README | 43 ------------------------------------------- README.md | 6 ++++++ 2 files changed, 6 insertions(+), 43 deletions(-) delete mode 100644 README diff --git a/README b/README deleted file mode 100644 index 4211bca..0000000 --- a/README +++ /dev/null @@ -1,43 +0,0 @@ -sf2github README -================ - -`sf2github` is a Python program that reads an XML export from a SourceForge project and pushes this data to GitHub via its REST API. - -The script is currently very incomplete and barely tested. If it works for you, great; if not, fix it up and send me a pull request! Currently, only migration of tracker issues is partly implemented, and there's no error handling. - -Also note that the GitHub API is quite slow, taking about 5 seconds per request on my machine and internet connection. Migration of a large project will take a while. - -Issue migration ---------------- - -What works (for me): - -* SF tracker issues become GitHub tracker issues. -* Comments on SF become comments in GitHub. -* Groups and categories on SF both become labels on GitHub. -* Issues with a status that is exactly the text "Closed" or "Deleted" will be closed on GitHub. - -Limitations: - -* Only a single tracker is supported, though this could be easily fixed. -* All issues and comments will be owned by the project's owner on GitHub, but mention the SF username of the original submitter. -* There's some rubbish in the comment text sometimes (Logged In, user_id, Originator) but this is in the SF XML export. -* There are encoding errors in the SF export of (at least) comments. Non-ASCII characters are encoded with UTF-8, then decoded (interpreted) as CP1252, and those code points gets encoded as XML entities. The script does not work around this. See also http://stackoverflow.com/questions/5291081/how-did-sourceforge-maim-this-unicode-character - -Code migration --------------- - -This script doesn't help you to migrate code from SF's Subversion to GitHub. However, I found the following page helpful in doing that: http://help.github.com/svn-importing/ - -Usage ------ - -Run the `issues.py` script and it will print instructions. Basically, if your SF XML export is in `foo.xml`, your GitHub username is `john` and your repository is `bar`: - - ./issues.py foo.xml john/bar - -License -------- - -This software is in the public domain. I accept no responsibility for any damage resulting from it. Use at your own risk. - diff --git a/README.md b/README.md index 28998a6..4211bca 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,12 @@ Limitations: * Only a single tracker is supported, though this could be easily fixed. * All issues and comments will be owned by the project's owner on GitHub, but mention the SF username of the original submitter. * There's some rubbish in the comment text sometimes (Logged In, user_id, Originator) but this is in the SF XML export. +* There are encoding errors in the SF export of (at least) comments. Non-ASCII characters are encoded with UTF-8, then decoded (interpreted) as CP1252, and those code points gets encoded as XML entities. The script does not work around this. See also http://stackoverflow.com/questions/5291081/how-did-sourceforge-maim-this-unicode-character + +Code migration +-------------- + +This script doesn't help you to migrate code from SF's Subversion to GitHub. However, I found the following page helpful in doing that: http://help.github.com/svn-importing/ Usage ----- -- cgit v1.2.1 From 18d535bae1b0b7d466978dac83d273fb022b4dbd Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 13 May 2011 15:04:34 +0200 Subject: link to xml export description --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4211bca..ec1173e 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,9 @@ This script doesn't help you to migrate code from SF's Subversion to GitHub. How Usage ----- -Run the `issues.py` script and it will print instructions. Basically, if your SF XML export is in `foo.xml`, your GitHub username is `john` and your repository is `bar`: +From SourceForge, you need to export the tracker data in XML. Read [here](https://sourceforge.net/apps/trac/sourceforge/wiki/XML%20export) for instructions. + +Run the `issues.py` script and it will print further instructions. Basically, if your SF XML export is in `foo.xml`, your GitHub username is `john` and your repository is `bar`: ./issues.py foo.xml john/bar -- cgit v1.2.1 From ae0ee6da8afd1e9ec9454740697afd05ba44fc7b Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 13 May 2011 15:59:42 +0200 Subject: better_exchook.py from https://github.com/albertz/py_better_exchook --- better_exchook.py | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 better_exchook.py diff --git a/better_exchook.py b/better_exchook.py new file mode 100644 index 0000000..083cbc2 --- /dev/null +++ b/better_exchook.py @@ -0,0 +1,211 @@ + +# by Albert Zeyer, www.az2000.de +# code under GPLv3+ +# 2011-04-15 + +# This is a simple replacement for the standard Python exception handler (sys.excepthook). +# In addition to what the standard handler does, it also prints all referenced variables +# (no matter if local, global or builtin) of the code line of each stack frame. +# See below for some examples and some example output. + +import sys + +def parse_py_statement(line): + state = 0 + curtoken = "" + spaces = " \t\n" + ops = ".,;:+-*/%&=(){}[]^<>" + i = 0 + def _escape_char(c): + if c == "n": return "\n" + elif c == "t": return "\t" + else: return c + while i < len(line): + c = line[i] + i += 1 + if state == 0: + if c in spaces: pass + elif c in ops: yield ("op", c) + elif c == "#": state = 6 + elif c == "\"": state = 1 + elif c == "'": state = 2 + else: + curtoken = c + state = 3 + elif state == 1: # string via " + if c == "\\": state = 4 + elif c == "\"": + yield ("str", curtoken) + curtoken = "" + state = 0 + else: curtoken += c + elif state == 2: # string via ' + if c == "\\": state = 5 + elif c == "'": + yield ("str", curtoken) + curtoken = "" + state = 0 + else: curtoken += c + elif state == 3: # identifier + if c in spaces + ops + "#\"'": + yield ("id", curtoken) + curtoken = "" + state = 0 + i -= 1 + else: curtoken += c + elif state == 4: # escape in " + curtoken += _escape_char(c) + state = 1 + elif state == 5: # escape in ' + curtoken += _escape_char(c) + state = 2 + elif state == 6: # comment + curtoken += c + if state == 3: yield ("id", curtoken) + elif state == 6: yield ("comment", curtoken) + +def grep_full_py_identifiers(tokens): + tokens = list(tokens) + i = 0 + pykeywords = set(["for","in","while","print","continue","break","if","else","elif","yield","def","class","try","except","import","pass","lambda"]) + while i < len(tokens): + tokentype, token = tokens[i] + i += 1 + if tokentype != "id": continue + while i+1 < len(tokens) and tokens[i] == ("op", ".") and tokens[i+1][0] == "id": + token += "." + tokens[i+1][1] + i += 2 + if token == "": continue + if token in pykeywords: continue + if token[0] in ".0123456789": continue + yield token + + +def output_limit(): + return 300 + +def output(s): + limit = output_limit() + if len(s) > limit: + s = s[:limit - 3] + "..." + sys.stderr.write(s) + sys.stderr.write("\n") + sys.stderr.flush() + +def debug_shell(user_ns, user_global_ns): + from IPython.Shell import IPShellEmbed,IPShell + ipshell = IPShell(argv=[], user_ns=user_ns, user_global_ns=user_global_ns) + #ipshell() + ipshell.mainloop() + + +def better_exchook(etype, value, tb): + output("EXCEPTION") + output('Traceback (most recent call last):') + topFrameLocals,topFrameGlobals = None,None + try: + import linecache + limit = None + if hasattr(sys, 'tracebacklimit'): + limit = sys.tracebacklimit + n = 0 + _tb = tb + def _resolveIdentifier(namespace, id): + obj = namespace[id[0]] + for part in id[1:]: + obj = getattr(obj, part) + return obj + def _trySet(old, func): + if old is not None: return old + try: return func() + except: return old + while _tb is not None and (limit is None or n < limit): + f = _tb.tb_frame + topFrameLocals,topFrameGlobals = f.f_locals,f.f_globals + lineno = _tb.tb_lineno + co = f.f_code + filename = co.co_filename + name = co.co_name + output(' File "%s", line %d, in %s' % (filename,lineno,name)) + linecache.checkcache(filename) + line = linecache.getline(filename, lineno, f.f_globals) + if line: + line = line.strip() + output(' line: ' + line) + output(' locals:') + alreadyPrintedLocals = set() + for tokenstr in grep_full_py_identifiers(parse_py_statement(line)): + splittedtoken = tuple(tokenstr.split(".")) + for token in map(lambda i: splittedtoken[0:i], range(1, len(splittedtoken) + 1)): + if token in alreadyPrintedLocals: continue + tokenvalue = None + tokenvalue = _trySet(tokenvalue, lambda: " " + repr(_resolveIdentifier(f.f_locals, token))) + tokenvalue = _trySet(tokenvalue, lambda: " " + repr(_resolveIdentifier(f.f_globals, token))) + tokenvalue = _trySet(tokenvalue, lambda: " " + repr(_resolveIdentifier(f.f_builtins, token))) + tokenvalue = tokenvalue or "" + output(' ' + ".".join(token) + " = " + tokenvalue) + alreadyPrintedLocals.add(token) + if len(alreadyPrintedLocals) == 0: output(" no locals") + _tb = _tb.tb_next + n += 1 + + except Exception, e: + output("ERROR: cannot get more detailed exception info because:") + import traceback + for l in traceback.format_exc().split("\n"): output(" " + l) + output("simple traceback:") + traceback.print_tb(tb) + + import types + def _some_str(value): + try: return str(value) + except: return '' % type(value).__name__ + def _format_final_exc_line(etype, value): + valuestr = _some_str(value) + if value is None or not valuestr: + line = "%s" % etype + else: + line = "%s: %s" % (etype, valuestr) + return line + if (isinstance(etype, BaseException) or + isinstance(etype, types.InstanceType) or + etype is None or type(etype) is str): + output(_format_final_exc_line(etype, value)) + else: + output(_format_final_exc_line(etype.__name__, value)) + + debug = False + try: + import os + debug = int(os.environ["DEBUG"]) != 0 + except: pass + if debug: + output("---------- DEBUG SHELL -----------") + debug_shell(user_ns=topFrameLocals, user_global_ns=topFrameGlobals) + +def install(): + sys.excepthook = better_exchook + +if __name__ == "__main__": + # some examples + # this code produces this output: https://gist.github.com/922622 + + try: + x = {1:2, "a":"b"} + def f(): + y = "foo" + x, 42, sys.stdin.__class__, sys.exc_info, y, z + f() + except: + better_exchook(*sys.exc_info()) + + try: + f = lambda x: None + f(x, y) + except: + better_exchook(*sys.exc_info()) + + # use this to overwrite the global exception handler + sys.excepthook = better_exchook + # and fail + finalfail(sys) -- cgit v1.2.1 From 2ea9ca847bfbb5a5476f89b66f90e68e6e6f5033 Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 13 May 2011 16:00:36 +0200 Subject: small cleanup --- issues.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/issues.py b/issues.py index 4b275f5..40270b7 100755 --- a/issues.py +++ b/issues.py @@ -20,9 +20,7 @@ print 'Parsing XML export...' soup = BeautifulStoneSoup(open(xml_file_name, 'r'), convertEntities=BeautifulStoneSoup.ALL_ENTITIES) trackers = soup.document.find('trackers', recursive=False).findAll('tracker', recursive=False) -if len(trackers) > 1: - print 'Multiple trackers not yet supported, sorry' - sys.exit(1) +assert len(trackers) == 1, 'Multiple trackers not yet supported, sorry' tracker = trackers[0] from urllib import urlencode -- cgit v1.2.1 From b476e122a2e69f8430e8a751af73f46c4b45c7d3 Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 13 May 2011 16:00:53 +0200 Subject: use better_exchook --- issues.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/issues.py b/issues.py index 40270b7..8eea844 100755 --- a/issues.py +++ b/issues.py @@ -1,5 +1,8 @@ #!/usr/bin/env python +import better_exchook +better_exchook.install() + import sys import optparse -- cgit v1.2.1 From 31a75db5d9fd937381740320fb130a8df498cabb Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 13 May 2011 16:01:06 +0200 Subject: gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc -- cgit v1.2.1 From 7c01164769c958736b4467fb8d4382ff72b90e50 Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 13 May 2011 16:46:46 +0200 Subject: support for multiple trackers. also some more output and some safty user verify questions --- issues.py | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 111 insertions(+), 15 deletions(-) diff --git a/issues.py b/issues.py index 8eea844..e7353b8 100755 --- a/issues.py +++ b/issues.py @@ -23,8 +23,6 @@ print 'Parsing XML export...' soup = BeautifulStoneSoup(open(xml_file_name, 'r'), convertEntities=BeautifulStoneSoup.ALL_ENTITIES) trackers = soup.document.find('trackers', recursive=False).findAll('tracker', recursive=False) -assert len(trackers) == 1, 'Multiple trackers not yet supported, sorry' -tracker = trackers[0] from urllib import urlencode from urllib2 import Request, urlopen @@ -33,9 +31,8 @@ from time import sleep from getpass import getpass import re -github_password = getpass('%s\'s GitHub password: ' % github_user) - def rest_call(before, after, data_dict=None): + global github_user, github_password url = 'https://github.com/api/v2/xml/%s/%s/%s' % (before, github_repo, after) if data_dict is None: data = None @@ -53,29 +50,39 @@ def rest_call(before, after, data_dict=None): def labelify(string): return re.sub(r'[^a-z0-9._-]+', '-', string.lower()) -closed_status_ids = [] -for status in tracker.statuses('status', recursive=False): - status_id = status.id.string - status_name = status.nameTag.string - if status_name in ['Closed', 'Deleted']: - closed_status_ids.append(status_id) +closed_status_ids = set() +for tracker in trackers: + for status in tracker.statuses('status', recursive=False): + status_id = status.id.string + status_name = status.nameTag.string + if status_name in ['Closed', 'Deleted']: + closed_status_ids.add(status_id) +print "closed_status_ids:", closed_status_ids groups = {} -for group in tracker.groups('group', recursive=False): - groups[group.id.string] = group.group_name.string +for tracker in trackers: + for group in tracker.groups('group', recursive=False): + groups[group.id.string] = group.group_name.string +print "groups:", groups categories = {} for category in tracker.categories('category', recursive=False): categories[category.id.string] = category.category_name.string +print "categories:", categories started = opts.start_id is None -for item in tracker.tracker_items('tracker_item', recursive=False): +def handle_tracker_item(item, issue_title_prefix): + global started if not started: if item.id.string == opts.start_id: started = True else: - continue - title = item.summary.string + return + + if len(issue_title_prefix) > 0: + issue_title_prefix = issue_title_prefix.strip() + " " + + title = issue_title_prefix + item.summary.string body = '\n\n'.join([ 'Converted from [SourceForge issue %s](%s), submitted by %s' % (item.id.string, item.url.string, item.submitter.string), item.details.string, @@ -112,3 +119,92 @@ for item in tracker.tracker_items('tracker_item', recursive=False): print 'Closing...' rest_call('issues/close', number) + +import signal +def signal_handler(signal, frame): + print 'You pressed Ctrl+C!' + import sys + sys.exit(0) +signal.signal(signal.SIGINT, signal_handler) + +import readline +readline.parse_and_bind("tab: complete") +readline.parse_and_bind("set show-all-if-ambiguous on") + +class Completer: + def __init__(self, words): + self.words = words + self.prefix = None + + def complete(self, prefix, index): + if prefix != self.prefix: + self.matching_words = [w for w in self.words if w.startswith(prefix)] + self.prefix = prefix + else: + pass + try: + return self.matching_words[index] + except IndexError: + return None + +def userRawInput(prompt): + readline.set_completer(None) + s = raw_input(prompt) + return s + +def userInput(words, prompt=""): + readline.set_completer(Completer(words).complete) + while True: + s = raw_input((prompt + " ").lstrip() + "Choice of [" + ", ".join(words) + "] ? ") + if s in words: return s + print "Error: '" + s + "' unknown, please try again" + +def userVerify(txt, abortOnFail=True): + if userInput(["yes","no"], txt) != 'yes': + if abortOnFail: + print "Aborted." + sys.exit(1) + return False + return True + +def getIssueTitlePrefix(trackername): + prefixes = { + "Bug": "", + "Feature Request": "[Feature]", + "Patch": "[Patch]", + "Tech Support": "[Support]" + } + if trackername in prefixes: + return prefixes[trackername] + + prefix = "[" + trackername + "]" + if not userVerify("Tracker '" + trackername + "' is unknown," + + "I would use the prefix '" + prefix + "', ok?", False): + + while True: + prefix = userRawInput("Please enter a prefix: ") + if userVerify("Is prefix '" + prefix + "' ok?"): + break + return prefix + +items = [] +for tracker in trackers: + trackeritems = tracker.tracker_items('tracker_item', recursive=False) + trackername = tracker.description.string + print "Found tracker:", trackername, ",", len(trackeritems), "items" + trackername = trackername.replace("Tracking System", "") + trackername = trackername.strip() + + issue_title_prefix = None + for item in trackeritems: + if issue_title_prefix is None: + issue_title_prefix = getIssueTitlePrefix(trackername) + items.append((item, issue_title_prefix)) + +print "Found", len(items), "items in", len(trackers), "trackers." + +userVerify("Everything ok, should I really start?") +github_password = getpass('%s\'s GitHub password: ' % github_user) +for item in items: + handle_tracker_item(item) + -- cgit v1.2.1 From 441437721ff75cb0634c15b1fd6b482a6a1153e9 Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 13 May 2011 16:53:17 +0200 Subject: small fix --- issues.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/issues.py b/issues.py index e7353b8..fdc2300 100755 --- a/issues.py +++ b/issues.py @@ -205,6 +205,6 @@ print "Found", len(items), "items in", len(trackers), "trackers." userVerify("Everything ok, should I really start?") github_password = getpass('%s\'s GitHub password: ' % github_user) -for item in items: - handle_tracker_item(item) +for item, issue_title_prefix in items: + handle_tracker_item(item, issue_title_prefix) -- cgit v1.2.1