This is an automated email from the ASF dual-hosted git repository. brondsem pushed a commit to branch db/8539 in repository https://gitbox.apache.org/repos/asf/allura.git
commit 2dc7b9fbdf79cae0cbd43fbc72a169724997ca47 Author: Dave Brondsema <dbronds...@slashdotmedia.com> AuthorDate: Wed Mar 20 16:20:08 2024 -0400 [#8539] remove old teamforge import script, which had lots of hardcoded specifics --- .gitignore | 1 - .pre-commit-config.yaml | 1 - Allura/docs/getting_started/administration.rst | 11 - scripts/teamforge-import.py | 1126 ------------------------ 4 files changed, 1139 deletions(-) diff --git a/.gitignore b/.gitignore index c620e794e..97d6ae99e 100644 --- a/.gitignore +++ b/.gitignore @@ -34,7 +34,6 @@ Allura/allura/templates/home Allura/allura/templates/var Allura/production.ini Allura/forced_upgrade.ini -scripts/teamforge-export/ /node_modules report.clonedigger .ropeproject diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7d3eea1f2..bfda2f671 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,7 +33,6 @@ repos: (?x)^( Allura/allura/eventslistener.py| Allura/allura/lib/.*| - scripts/teamforge-import.py )$ diff --git a/Allura/docs/getting_started/administration.rst b/Allura/docs/getting_started/administration.rst index 03b413f21..5d9bff267 100644 --- a/Allura/docs/getting_started/administration.rst +++ b/Allura/docs/getting_started/administration.rst @@ -314,17 +314,6 @@ scrub-allura-data.py :prog: paster script development.ini ../scripts/scrub-allura-data.py -- -teamforge-import.py -------------------- - -*Cannot currently be run as a background task.* - -Extract data from a TeamForge site (via its web API), and import directly into Allura. There are some hard-coded -and extra functions in this script, which should be removed or updated before being used again. -Requires running: :command:`pip install suds` first. :: - - usage: paster script development.ini ../scripts/teamforge-import.py -- --help - .. _site-notifications: Site Notifications diff --git a/scripts/teamforge-import.py b/scripts/teamforge-import.py deleted file mode 100644 index 15b669d38..000000000 --- a/scripts/teamforge-import.py +++ /dev/null @@ -1,1126 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import logging -from ast import literal_eval -from getpass import getpass -from optparse import OptionParser -from tg import tmpl_context as c -import re -import os -from time import mktime -import time -import json -from urllib.parse import urlparse -import six.moves.urllib.request -import six.moves.urllib.parse -import six.moves.urllib.error -from http.cookiejar import CookieJar -from datetime import datetime -from configparser import ConfigParser -import random -import string - -import sqlalchemy -from suds.client import Client -from ming.odm.odmsession import ThreadLocalODMSession -from ming.base import Object - -from allura import model as M -from allura.lib import helpers as h -from allura.lib import utils -import six - -log = logging.getLogger('teamforge-import') - -''' - -http://help.collab.net/index.jsp?topic=/teamforge520/reference/api-services.html - -http://www.open.collab.net/nonav/community/cif/csfe/50/javadoc/index.html?com/collabnet/ce/soap50/webservices/page/package-summary.html - -''' - -options = None -s = None # security token -client = None # main api client -users = {} - -cj = CookieJar() -loggedInOpener = six.moves.urllib.request.build_opener(six.moves.urllib.request.HTTPCookieProcessor(cj)) - - -def make_client(api_url, app): - return Client(api_url + app + '?wsdl', location=api_url + app) - - -def main(): - global options, s, client, users - defaults = dict( - api_url=None, - attachment_url='/sf/%s/do/%s/', - default_wiki_text='PRODUCT NAME HERE', - username=None, - password=None, - output_dir='teamforge-export/', - list_project_ids=False, - neighborhood=None, - neighborhood_shortname=None, - use_thread_import_id_when_reloading=False, - skip_wiki=False, - skip_frs_download=False, - skip_unsupported_check=False) - optparser = get_parser(defaults) - options, project_ids = optparser.parse_args() - if options.config_file: - config = ConfigParser() - config.read(options.config_file) - defaults.update( - (k, literal_eval(v)) for k, v in config.items('teamforge-import')) - optparser = get_parser(defaults) - options, project_ids = optparser.parse_args() - - # neither specified, so do both - if not options.extract and not options.load: - options.extract = True - options.load = True - - if options.extract: - client = make_client(options.api_url, 'CollabNet') - api_v = client.service.getApiVersion() - if not api_v.startswith('5.4.'): - log.warning('Unexpected API Version %s. May not work correctly.' % - api_v) - - s = client.service.login( - options.username, options.password or getpass('Password: ')) - teamforge_v = client.service.getVersion(s) - if not teamforge_v.startswith('5.4.'): - log.warning( - 'Unexpected TeamForge Version %s. May not work correctly.' % - teamforge_v) - - if options.load: - if not options.neighborhood: - log.error('You must specify a neighborhood when loading') - return - try: - nbhd = M.Neighborhood.query.get(name=options.neighborhood) - except Exception: - log.exception('error querying mongo') - log.error( - 'This should be run as "paster script production.ini ../scripts/teamforge-import.py -- ...options.."') - return - assert nbhd - - if not project_ids: - if not options.extract: - log.error('You must specify project ids') - return - projects = client.service.getProjectList(s) - project_ids = [p.id for p in projects.dataRows] - - if options.list_project_ids: - print(' '.join(project_ids)) - return - - if not os.path.exists(options.output_dir): - os.makedirs(options.output_dir) - for pid in project_ids: - if options.extract: - try: - project = client.service.getProjectData(s, pid) - log.info('Project: %s %s %s' % - (project.id, project.title, project.path)) - out_dir = os.path.join(options.output_dir, project.id) - if not os.path.exists(out_dir): - os.mkdir(out_dir) - - get_project(project) - get_files(project) - if not options.skip_wiki: - get_homepage_wiki(project) - get_discussion(project) - get_news(project) - if not options.skip_unsupported_check: - check_unsupported_tools(project) - with open(os.path.join(options.output_dir, 'users.json'), 'w', encoding='utf-8') as user_file: - json.dump(users, user_file, default=str) - except Exception: - log.exception('Error extracting %s' % pid) - - if options.load: - try: - project = create_project(pid, nbhd) - except Exception: - log.exception('Error creating %s' % pid) - - -def load_users(): - ''' load the users data from file, if it hasn't been already ''' - global users - user_filename = os.path.join(options.output_dir, 'users.json') - if not users and os.path.exists(user_filename): - with open(user_filename) as user_file: - # Object for attribute access - users = json.load(user_file, object_hook=Object) - - -def save_user(usernames): - if isinstance(usernames, str): - usernames = [usernames] - - load_users() - - for username in usernames: - if username not in users: - user_data = client.service.getUserData(s, username) - users[username] = Object(user_data) - if users[username].status != 'Active': - log.warning('user: %s status: %s' % - (username, users[username].status)) - - -def get_project(project): - global client - cats = make_client(options.api_url, 'CategorizationApp') - - data = client.service.getProjectData(s, project.id) - access_level = {1: 'public', 4: 'private', 3: 'gated community'}[ - client.service.getProjectAccessLevel(s, project.id) - ] - admins = client.service.listProjectAdmins(s, project.id).dataRows - members = client.service.getProjectMemberList(s, project.id).dataRows - groups = client.service.getProjectGroupList(s, project.id).dataRows - categories = cats.service.getProjectCategories(s, project.id).dataRows - save(json.dumps(dict( - data=dict(data), - access_level=access_level, - admins=list(map(dict, admins)), - members=list(map(dict, members)), - groups=list(map(dict, groups)), - categories=list(map(dict, categories)), - ), default=str), - project, project.id + '.json') - - if len(groups): - log.warning('Project has groups %s' % groups) - for u in admins: - if not u.status != 'active': - log.warning('inactive admin %s' % u) - if u.superUser: - log.warning('super user admin %s' % u) - - save_user(data.createdBy) - save_user(u.userName for u in admins) - save_user(u.userName for u in members) - - -def get_user(orig_username): - 'returns an allura User object' - sf_username = make_valid_sf_username(orig_username) - - u = M.User.by_username(sf_username) - - if not u: - load_users() - user = users[orig_username] - if user.status != 'Active': - log.warning(f'Inactive user {orig_username} {user.status}') - - if not 3 <= len(user.fullName) <= 32: - raise Exception('invalid fullName length: %s' % user.fullName) - if '@' not in user.email: - raise Exception('invalid email: %s' % user.email) - # FIXME: hardcoded SFX integration - from sfx.model import tables as T - nu = T.users.insert() - nu.execute(user_name=sf_username.encode('utf-8'), - email=user.email.lower().encode('utf-8'), - realname=user.fullName.encode('utf-8'), - status='A' if user.status == 'Active' else 'D', - language=275, # english trove id - timezone=user.timeZone, - user_pw=''.join(random.sample(string.printable, 32)), - unix_pw=''.join(random.sample(string.printable, 32)), - user_pw_modtime=int(time.time()), - mail_siteupdates=0, - add_date=int(time.time()), - ) - user_id = sqlalchemy.select( - [T.users.c.user_id], T.users.c.user_name == sf_username).execute().fetchone().user_id - npref = T.user_preferences.insert() - npref.execute(user_id=user_id, preference_name='country', - preference_value='US') - npref.execute(user_id=user_id, - preference_name='opt_research', preference_value=0) - npref.execute(user_id=user_id, - preference_name='opt_thirdparty', preference_value=0) - - new_audit = T.audit_trail_user.insert() - new_audit.execute( - date=int(time.time()), - username='nobody', - ip_address='(imported)', - operation_resource=user_id, - operation='%s user account created by TeamForge import script' % user.status, - operation_target='', - ) - - u = M.User.by_username(sf_username) - assert u - return u - - -def convert_project_shortname(teamforge_path): - 'convert from TeamForge to SF, and validate early' - tf_shortname = teamforge_path.split('.')[-1] - sf_shortname = tf_shortname.replace('_', '-') - - # FIXME hardcoded translations - sf_shortname = { - 'i1': 'motorola-i1', - 'i9': 'motorola-i9', - 'devplatformforocap': 'ocap-dev-pltfrm', - 'sitewide': '--init--', - }.get(sf_shortname, sf_shortname) - - if not 3 <= len(sf_shortname) <= 15: - raise ValueError( - 'Project name length must be between 3 & 15, inclusive: %s (%s)' % - (sf_shortname, len(sf_shortname))) - return sf_shortname - - -# FIXME hardcoded -skip_perms_usernames = { - 'username1', 'username2', 'username3' -} - - -def create_project(pid, nbhd): - M.session.artifact_orm_session._get().skip_mod_date = True - data = loadjson(pid, pid + '.json') - # pprint(data) - log.info(f'Loading: {pid} {data.data.title} {data.data.path}') - shortname = convert_project_shortname(data.data.path) - - project = M.Project.query.get( - shortname=shortname, neighborhood_id=nbhd._id) - if not project: - private = (data.access_level == 'private') - log.debug(f'Creating {shortname} private={private}') - one_admin = [ - u.userName for u in data.admins if u.status == 'Active'][0] - project = nbhd.register_project(shortname, - get_user(one_admin), - project_name=data.data.title, - private_project=private) - project.notifications_disabled = True - project.short_description = data.data.description - project.last_updated = datetime.strptime( - data.data.lastModifiedDate, '%Y-%m-%d %H:%M:%S') - M.main_orm_session.flush(project) - # TODO: push last_updated to gutenberg? - # TODO: try to set createdDate? - - role_admin = M.ProjectRole.by_name('Admin', project) - admin_usernames = set() - for admin in data.admins: - # FIXME: skip non-active users - if admin.userName in skip_perms_usernames: - continue - admin_usernames.add(admin.userName) - user = get_user(admin.userName) - c.user = user - pr = M.ProjectRole.by_user(user, project=project, upsert=True) - pr.roles = [role_admin._id] - ThreadLocalODMSession.flush_all() - role_developer = M.ProjectRole.by_name('Developer', project) - for member in data.members: - # FIXME: skip non-active users - if member.userName in skip_perms_usernames: - continue - if member.userName in admin_usernames: - continue - user = get_user(member.userName) - pr = M.ProjectRole.by_user(user, project=project, upsert=True) - pr.roles = [role_developer._id] - ThreadLocalODMSession.flush_all() - project.labels = [cat.path.split('projects/categorization.root.')[1] - for cat in data.categories] - icon_file = 'emsignia-MOBILITY-red.png' - if 'nsn' in project.labels or 'msi' in project.labels: - icon_file = 'emsignia-SOLUTIONS-blue.gif' - if project.icon: - M.ProjectFile.remove(dict(project_id=project._id, category='icon')) - with open(os.path.join('..', 'scripts', icon_file), 'rb') as fp: - M.ProjectFile.save_image( - icon_file, fp, content_type=utils.guess_mime_type(icon_file), - square=True, thumbnail_size=(48, 48), - thumbnail_meta=dict(project_id=project._id, category='icon')) - ThreadLocalODMSession.flush_all() - - dirs = os.listdir(os.path.join(options.output_dir, pid)) - - frs_mapping = loadjson(pid, 'frs_mapping.json') - - if not options.skip_wiki and 'wiki' in dirs: - import_wiki(project, pid, nbhd) - if not options.skip_frs_download and not project.app_instance('downloads'): - project.install_app('Downloads', 'downloads') - if 'forum' in dirs: - import_discussion(project, pid, frs_mapping, shortname, nbhd) - if 'news' in dirs: - import_news(project, pid, frs_mapping, shortname, nbhd) - - project.notifications_disabled = False - ThreadLocalODMSession.flush_all() - return project - - -def import_wiki(project, pid, nbhd): - from forgewiki import model as WM - - def upload_attachments(page, pid, beginning): - dirpath = os.path.join(options.output_dir, pid, 'wiki', beginning) - if not os.path.exists(dirpath): - return - files = os.listdir(dirpath) - for f in files: - with open(os.path.join(options.output_dir, pid, 'wiki', beginning, f)) as fp: - page.attach(f, fp, content_type=utils.guess_mime_type(f)) - pages = os.listdir(os.path.join(options.output_dir, pid, 'wiki')) - # handle the homepage content - if 'homepage_text.markdown' in pages: - home_app = project.app_instance('home') - h.set_context(project.shortname, 'home', neighborhood=nbhd) - # set permissions and config options - role_admin = M.ProjectRole.by_name('Admin')._id - role_anon = M.ProjectRole.by_name('*anonymous')._id - home_app.config.options['show_discussion'] = False - home_app.config.options['show_left_bar'] = False - home_app.config.options['show_right_bar'] = False - home_app.config.acl = [ - M.ACE.allow(role_anon, 'read'), - M.ACE.allow(role_admin, 'create'), - M.ACE.allow(role_admin, 'edit'), - M.ACE.allow(role_admin, 'delete'), - M.ACE.allow(role_admin, 'moderate'), - M.ACE.allow(role_admin, 'configure'), - M.ACE.allow(role_admin, 'admin')] - p = WM.Page.upsert('Home') - p.text = wiki2markdown(load(pid, 'wiki', 'homepage_text.markdown')) - upload_attachments(p, pid, 'homepage') - if 'HomePage.json' in pages and 'HomePage.markdown' in pages: - wiki_app = project.app_instance('wiki') - if not wiki_app: - wiki_app = project.install_app('Wiki', 'wiki') - h.set_context(project.shortname, 'wiki', neighborhood=nbhd) - # set permissions and config options - role_admin = M.ProjectRole.by_name('Admin')._id - role_anon = M.ProjectRole.by_name('*anonymous')._id - wiki_app.config.options['show_discussion'] = False - wiki_app.config.options['show_left_bar'] = False - wiki_app.config.options['show_right_bar'] = False - wiki_app.config.acl = [ - M.ACE.allow(role_anon, 'read'), - M.ACE.allow(role_admin, 'create'), - M.ACE.allow(role_admin, 'edit'), - M.ACE.allow(role_admin, 'delete'), - M.ACE.allow(role_admin, 'moderate'), - M.ACE.allow(role_admin, 'configure'), - M.ACE.allow(role_admin, 'admin')] - # make all the wiki pages - for page in pages: - ending = page[-5:] - beginning = page[:-5] - markdown_file = '%s.markdown' % beginning - if '.json' == ending and markdown_file in pages: - page_data = loadjson(pid, 'wiki', page) - content = load(pid, 'wiki', markdown_file) - if page == 'HomePage.json': - globals = WM.Globals.query.get( - app_config_id=wiki_app.config._id) - if globals is not None: - globals.root = page_data.title - else: - globals = WM.Globals( - app_config_id=wiki_app.config._id, root=page_data.title) - p = WM.Page.upsert(page_data.title) - p.text = wiki2markdown(content) - # upload attachments - upload_attachments(p, pid, beginning) - if not p.history().first(): - p.commit() - ThreadLocalODMSession.flush_all() - - -def import_discussion(project, pid, frs_mapping, sf_project_shortname, nbhd): - from forgediscussion import model as DM - discuss_app = project.app_instance('discussion') - if not discuss_app: - discuss_app = project.install_app('Discussion', 'discussion') - h.set_context(project.shortname, 'discussion', neighborhood=nbhd) - assert c.app - # set permissions and config options - role_admin = M.ProjectRole.by_name('Admin')._id - role_developer = M.ProjectRole.by_name('Developer')._id - role_auth = M.ProjectRole.by_name('*authenticated')._id - role_anon = M.ProjectRole.by_name('*anonymous')._id - discuss_app.config.acl = [ - M.ACE.allow(role_anon, 'read'), - M.ACE.allow(role_auth, 'post'), - M.ACE.allow(role_auth, 'unmoderated_post'), - M.ACE.allow(role_developer, 'moderate'), - M.ACE.allow(role_admin, 'configure'), - M.ACE.allow(role_admin, 'admin')] - ThreadLocalODMSession.flush_all() - DM.Forum.query.remove( - dict(app_config_id=discuss_app.config._id, shortname='general')) - forums = os.listdir(os.path.join(options.output_dir, pid, 'forum')) - for forum in forums: - ending = forum[-5:] - forum_name = forum[:-5] - if '.json' == ending and forum_name in forums: - forum_data = loadjson(pid, 'forum', forum) - fo = DM.Forum.query.get( - shortname=forum_name, app_config_id=discuss_app.config._id) - if not fo: - fo = DM.Forum(app_config_id=discuss_app.config._id, - shortname=forum_name) - fo.name = forum_data.title - fo.description = forum_data.description - fo_num_topics = 0 - fo_num_posts = 0 - topics = os.listdir(os.path.join(options.output_dir, pid, 'forum', - forum_name)) - for topic in topics: - ending = topic[-5:] - topic_name = topic[:-5] - if '.json' == ending and topic_name in topics: - fo_num_topics += 1 - topic_data = loadjson(pid, 'forum', forum_name, topic) - thread_query = dict( - subject=topic_data.title, - discussion_id=fo._id, - app_config_id=discuss_app.config._id) - if not options.skip_thread_import_id_when_reloading: - # temporary/transitional. Just needed the first time - # running with this new code against an existing import - # that didn't have import_ids - thread_query['import_id'] = topic_data.id - to = DM.ForumThread.query.get(**thread_query) - if not to: - to = DM.ForumThread.new( - subject=topic_data.title, - discussion_id=fo._id, - import_id=topic_data.id, - app_config_id=discuss_app.config._id) - to.import_id = topic_data.id - to_num_replies = 0 - oldest_post = None - newest_post = None - posts = sorted( - os.listdir(os.path.join(options.output_dir, pid, 'forum', forum_name, topic_name))) - for post in posts: - ending = post[-5:] - post_name = post[:-5] - if '.json' == ending: - to_num_replies += 1 - post_data = loadjson(pid, 'forum', - forum_name, topic_name, post) - p = DM.ForumPost.query.get( - _id='{}{}@import'.format( - post_name, str(discuss_app.config._id)), - thread_id=to._id, - discussion_id=fo._id, - app_config_id=discuss_app.config._id) - - if not p: - p = DM.ForumPost( - _id='{}{}@import'.format( - post_name, str( - discuss_app.config._id)), - thread_id=to._id, - discussion_id=fo._id, - app_config_id=discuss_app.config._id) - create_date = datetime.strptime( - post_data.createdDate, '%Y-%m-%d %H:%M:%S') - p.timestamp = create_date - p.author_id = str( - get_user(post_data.createdByUserName)._id) - p.text = convert_post_content( - frs_mapping, sf_project_shortname, post_data.content, nbhd) - p.status = 'ok' - if post_data.replyToId: - p.parent_id = '{}{}@import'.format( - post_data.replyToId, str(discuss_app.config._id)) - slug, full_slug = p.make_slugs( - parent=p.parent, timestamp=create_date) - p.slug = slug - p.full_slug = full_slug - if oldest_post is None or oldest_post.timestamp > create_date: - oldest_post = p - if newest_post is None or newest_post.timestamp < create_date: - newest_post = p - ThreadLocalODMSession.flush_all() - to.num_replies = to_num_replies - to.first_post_id = oldest_post._id - to.last_post_date = newest_post.timestamp - to.mod_date = newest_post.timestamp - fo_num_posts += to_num_replies - fo.num_topics = fo_num_topics - fo.num_posts = fo_num_posts - ThreadLocalODMSession.flush_all() - - -def import_news(project, pid, frs_mapping, sf_project_shortname, nbhd): - from forgeblog import model as BM - posts = os.listdir(os.path.join(options.output_dir, pid, 'news')) - if len(posts): - news_app = project.app_instance('news') - if not news_app: - news_app = project.install_app('blog', 'news', mount_label='News') - h.set_context(project.shortname, 'news', neighborhood=nbhd) - # make all the blog posts - for post in posts: - if '.json' == post[-5:]: - post_data = loadjson(pid, 'news', post) - create_date = datetime.strptime( - post_data.createdOn, '%Y-%m-%d %H:%M:%S') - p = BM.BlogPost.query.get(title=post_data.title, - timestamp=create_date, - app_config_id=news_app.config._id) - if not p: - p = BM.BlogPost(title=post_data.title, - timestamp=create_date, - app_config_id=news_app.config._id) - p.text = convert_post_content( - frs_mapping, sf_project_shortname, post_data.body, nbhd) - p.mod_date = create_date - p.state = 'published' - if not p.slug: - p.make_slug() - if not p.history().first(): - p.commit() - ThreadLocalODMSession.flush_all() - M.Thread.new(discussion_id=p.app_config.discussion_id, - ref_id=p.index_id(), - subject='%s discussion' % p.title) - user = get_user(post_data.createdByUsername) - p.history().first().author = dict( - id=user._id, - username=user.username, - display_name=user.get_pref('display_name')) - ThreadLocalODMSession.flush_all() - - -def check_unsupported_tools(project): - docs = make_client(options.api_url, 'DocumentApp') - doc_count = 0 - for doc in docs.service.getDocumentFolderList(s, project.id, recursive=True).dataRows: - if doc.title == 'Root Folder': - continue - doc_count += 1 - if doc_count: - log.warning('Migrating documents is not supported, but found %s docs' % - doc_count) - - scm = make_client(options.api_url, 'ScmApp') - for repo in scm.service.getRepositoryList(s, project.id).dataRows: - log.warning('Migrating SCM repos is not supported, but found %s' % - repo.repositoryPath) - - tasks = make_client(options.api_url, 'TaskApp') - task_count = len( - tasks.service.getTaskList(s, project.id, filters=None).dataRows) - if task_count: - log.warning('Migrating tasks is not supported, but found %s tasks' % - task_count) - - tracker = make_client(options.api_url, 'TrackerApp') - tracker_count = len( - tracker.service.getArtifactList(s, project.id, filters=None).dataRows) - if tracker_count: - log.warning( - 'Migrating trackers is not supported, but found %s tracker artifacts' % - task_count) - - -def load(project_id, *paths): - in_file = os.path.join(options.output_dir, project_id, *paths) - with open(in_file, encoding='utf-8') as input: - content = input.read() - return content - - -def loadjson(*args): - # Object for attribute access - return json.loads(load(*args), object_hook=Object) - - -def save(content, project, *paths): - out_file = os.path.join(options.output_dir, project.id, *paths) - if not os.path.exists(os.path.dirname(out_file)): - os.makedirs(os.path.dirname(out_file)) - with open(out_file, 'w', encoding='utf-8') as out: - out.write(content.encode('utf-8')) - - -def download_file(tool, url_path, *filepaths): - if tool == 'wiki': - action = 'viewAttachment' - elif tool == 'frs': - action = 'downloadFile' - else: - raise ValueError('tool %s not supported' % tool) - action_url = options.attachment_url % (tool, action) - - out_file = os.path.join(options.output_dir, *filepaths) - if not os.path.exists(os.path.dirname(out_file)): - os.makedirs(os.path.dirname(out_file)) - - if '://' in url_path: - url = url_path - else: - hostname = urlparse(options.api_url).hostname - scheme = urlparse(options.api_url).scheme - url = scheme + '://' + hostname + action_url + six.moves.urllib.parse.quote(url_path) - log.debug('fetching %s' % url) - - resp = loggedInOpener.open(url) - # if not logged in and this is private, you will get an html response instead of the file - # log in to make sure the file should really be html - if resp.headers.type == 'text/html': - # log in and save the file - resp = loggedInOpener.open(scheme + '://' + hostname + "/sf/sfmain/do/login", six.moves.urllib.parse.urlencode( - {'username': options.username, 'password': options.password, 'returnToUrl': url, 'sfsubmit': 'submit'})) - with open(out_file, 'w', encoding='utf-8') as out: - out.write(resp.fp.read()) - return out_file - - -bracket_macro = re.compile(r'\[(.*?)\]') -h1 = re.compile(r'^!!!', re.MULTILINE) -h2 = re.compile(r'^!!', re.MULTILINE) -h3 = re.compile(r'^!', re.MULTILINE) -re_stats = re.compile(r'#+ .* [Ss]tatistics\n+(.*\[sf:.*?Statistics\].*)+') - - -def wiki2markdown(markup): - ''' - Partial implementation of http://help.collab.net/index.jsp?topic=/teamforge520/reference/wiki-wikisyntax.html - TODO: __ for bold - TODO: quote filenames with spaces, e.g. [[img src="foo bar.jpg"]] - ''' - def bracket_handler(matchobj): - snippet = matchobj.group(1) - ext = snippet.rsplit('.')[-1].lower() - # TODO: support [foo|bar.jpg] - if snippet.startswith('sf:'): - # can't handle these macros - return matchobj.group(0) - elif ext in ('jpg', 'gif', 'png'): - filename = snippet.split('/')[-1] - return '[[img src=%s]]' % filename - elif '|' in snippet: - text, link = snippet.split('|', 1) - return f'[{text}]({link})' - else: - # regular link - return '<%s>' % snippet - markup = bracket_macro.sub(bracket_handler, markup or '') - markup = h1.sub('#', markup) - markup = h2.sub('##', markup) - markup = h3.sub('###', markup) - - markup = re_stats.sub('', markup) - return markup - - -re_rel = re.compile(r'\b(rel\d+)\b') - - -def convert_post_content(frs_mapping, sf_project_shortname, text, nbhd): - def rel_handler(matchobj): - relno = matchobj.group(1) - path = frs_mapping.get(relno) - if path: - return '<a href="/projects/{}.{}/files/{}">{}</a>'.format( - sf_project_shortname, nbhd.url_prefix.strip('/'), path, path) - else: - return relno - text = re_rel.sub(rel_handler, text or '') - return text - - -def find_image_references(markup): - 'yields filenames' - for matchobj in bracket_macro.finditer(markup): - snippet = matchobj.group(1) - ext = snippet.rsplit('.')[-1].lower() - if ext in ('jpg', 'gif', 'png'): - yield snippet - - -def get_news(project): - ''' - Extracts news posts - ''' - app = make_client(options.api_url, 'NewsApp') - - # find the forums - posts = app.service.getNewsPostList(s, project.id) - for post in posts.dataRows: - save(json.dumps(dict(post), default=str), - project, 'news', post.id + '.json') - save_user(post.createdByUsername) - - -def get_discussion(project): - ''' - Extracts discussion forums and posts - ''' - app = make_client(options.api_url, 'DiscussionApp') - - # find the forums - forums = app.service.getForumList(s, project.id) - for forum in forums.dataRows: - forumname = forum.path.split('.')[-1] - log.info('Retrieving data for forum: %s' % forumname) - save(json.dumps(dict(forum), default=str), project, 'forum', - forumname + '.json') - # topic in this forum - topics = app.service.getTopicList(s, forum.id) - for topic in topics.dataRows: - save(json.dumps(dict(topic), default=str), project, 'forum', - forumname, topic.id + '.json') - # posts in this topic - posts = app.service.getPostList(s, topic.id) - for post in posts.dataRows: - save(json.dumps(dict(post), default=str), project, 'forum', - forumname, topic.id, post.id + '.json') - save_user(post.createdByUserName) - - -def get_homepage_wiki(project): - ''' - Extracts home page and wiki pages - ''' - wiki = make_client(options.api_url, 'WikiApp') - - pages = {} - wiki_pages = wiki.service.getWikiPageList(s, project.id) - for wiki_page in wiki_pages.dataRows: - wiki_page = wiki.service.getWikiPageData(s, wiki_page.id) - pagename = wiki_page.path.split('/')[-1] - save(json.dumps(dict(wiki_page), default=str), - project, 'wiki', pagename + '.json') - if not wiki_page.wikiText: - log.debug('skip blank wiki page %s' % wiki_page.path) - continue - pages[pagename] = wiki_page.wikiText - - # PageApp does not provide a useful way to determine the Project Home special wiki page - # so use some heuristics - homepage = None - if '$ProjectHome' in pages and options.default_wiki_text not in pages['$ProjectHome']: - homepage = pages.pop('$ProjectHome') - elif 'HomePage' in pages and options.default_wiki_text not in pages['HomePage']: - homepage = pages.pop('HomePage') - elif '$ProjectHome' in pages: - homepage = pages.pop('$ProjectHome') - elif 'HomePage' in pages: - homepage = pages.pop('HomePage') - else: - log.warning('did not find homepage') - - if homepage: - save(homepage, project, 'wiki', 'homepage_text.markdown') - for img_ref in find_image_references(homepage): - filename = img_ref.split('/')[-1] - if '://' in img_ref: - img_url = img_ref - else: - img_url = project.path + '/wiki/' + img_ref - download_file('wiki', img_url, project.id, - 'wiki', 'homepage', filename) - - for path, text in pages.items(): - if options.default_wiki_text in text: - log.debug('skipping default wiki page %s' % path) - else: - save(text, project, 'wiki', path + '.markdown') - for img_ref in find_image_references(text): - filename = img_ref.split('/')[-1] - if '://' in img_ref: - img_url = img_ref - else: - img_url = project.path + '/wiki/' + img_ref - download_file('wiki', img_url, project.id, - 'wiki', path, filename) - - -def _dir_sql(created_on, project, dir_name, rel_path): - assert options.neighborhood_shortname - if not rel_path: - parent_directory = "'1'" - else: - parent_directory = "(SELECT pfs_path FROM pfs_path WHERE path_name = '%s/')" % rel_path - sql = """ - UPDATE pfs - SET file_crtime = '{}' - WHERE source_pk = (SELECT project.project FROM project WHERE project.project_name = '{}.{}') - AND source_table = 'project' - AND pfs_type = 'd' - AND pfs_name = '{}' - AND parent_directory = {}; - """.format(created_on, convert_project_shortname(project.path), options.neighborhood_shortname, - dir_name, parent_directory) - return sql - - -def get_files(project): - frs = make_client(options.api_url, 'FrsApp') - valid_pfs_filename = re.compile( - r'(?![. ])[-_ +.,=#~@!()\[\]a-zA-Z0-9]+(?<! )$') - pfs_output_dir = os.path.join( - os.path.abspath(options.output_dir), 'PFS', convert_project_shortname(project.path)) - sql_updates = '' - - def handle_path(obj, prev_path): - path_component = obj.title.strip().replace( - '/', ' ').replace('&', '').replace(':', '') - path = os.path.join(prev_path, path_component) - if not valid_pfs_filename.match(path_component): - log.error('Invalid filename: "%s"' % path) - save(json.dumps(dict(obj), default=str), - project, 'frs', path + '.json') - return path - - frs_mapping = {} - - for pkg in frs.service.getPackageList(s, project.id).dataRows: - pkg_path = handle_path(pkg, '') - pkg_details = frs.service.getPackageData(s, pkg.id) # download count - save(json.dumps(dict(pkg_details), default=str), - project, 'frs', pkg_path + '_details.json') - - for rel in frs.service.getReleaseList(s, pkg.id).dataRows: - rel_path = handle_path(rel, pkg_path) - frs_mapping[rel['id']] = rel_path - # download count - rel_details = frs.service.getReleaseData(s, rel.id) - save(json.dumps(dict(rel_details), default=str), - project, 'frs', rel_path + '_details.json') - - for file in frs.service.getFrsFileList(s, rel.id).dataRows: - details = frs.service.getFrsFileData(s, file.id) - - file_path = handle_path(file, rel_path) - save(json.dumps(dict(file, - lastModifiedBy=details.lastModifiedBy, - lastModifiedDate=details.lastModifiedDate, - ), - default=str), - project, - 'frs', - file_path + '.json' - ) - if not options.skip_frs_download: - download_file('frs', rel.path + '/' + file.id, - pfs_output_dir, file_path) - mtime = int(mktime(details.lastModifiedDate.timetuple())) - os.utime(os.path.join(pfs_output_dir, file_path), - (mtime, mtime)) - - # releases - created_on = int(mktime(rel.createdOn.timetuple())) - mtime = int(mktime(rel.lastModifiedOn.timetuple())) - if os.path.exists(os.path.join(pfs_output_dir, rel_path)): - os.utime(os.path.join(pfs_output_dir, rel_path), - (mtime, mtime)) - sql_updates += _dir_sql(created_on, project, - rel.title.strip(), pkg_path) - # packages - created_on = int(mktime(pkg.createdOn.timetuple())) - mtime = int(mktime(pkg.lastModifiedOn.timetuple())) - if os.path.exists(os.path.join(pfs_output_dir, pkg_path)): - os.utime(os.path.join(pfs_output_dir, pkg_path), (mtime, mtime)) - sql_updates += _dir_sql(created_on, project, pkg.title.strip(), '') - # save pfs update sql for this project - with open(os.path.join(options.output_dir, 'pfs_updates.sql'), 'a') as out: - out.write('/* %s */' % project.id) - out.write(sql_updates) - save(json.dumps(frs_mapping), project, 'frs_mapping.json') - - -def get_parser(defaults): - optparser = OptionParser( - usage=('%prog [--options] [projID projID projID]\n' - 'If no project ids are given, all projects will be migrated')) - optparser.set_defaults(**defaults) - - # Command-line-only options - optparser.add_option( - '--extract-only', action='store_true', dest='extract', - help='Store data from the TeamForge API on the local filesystem; not load into Allura') - optparser.add_option( - '--load-only', action='store_true', dest='load', - help='Load into Allura previously-extracted data') - optparser.add_option( - '--config-file', dest='config_file', - help='Load options from config file') - - # Command-line options with defaults in config file - optparser.add_option( - '--api-url', dest='api_url', help='e.g. https://hostname/ce-soap50/services/') - optparser.add_option( - '--attachment-url', dest='attachment_url') - optparser.add_option( - '--default-wiki-text', dest='default_wiki_text', - help='used in determining if a wiki page text is default or changed') - optparser.add_option( - '-u', '--username', dest='username') - optparser.add_option( - '-p', '--password', dest='password') - optparser.add_option( - '-o', '--output-dir', dest='output_dir') - optparser.add_option( - '--list-project-ids', action='store_true', dest='list_project_ids') - optparser.add_option( - '-n', '--neighborhood', dest='neighborhood', - help='Neighborhood full name, to load in to') - optparser.add_option( - '--n-shortname', dest='neighborhood_shortname', - help='Neighborhood shortname, for PFS extract SQL') - optparser.add_option( - '--skip-thread-import-id-when-reloading', action='store_true', - dest='skip_thread_import_id_when_reloading' - ) - optparser.add_option( - '--skip-frs-download', action='store_true', dest='skip_frs_download') - optparser.add_option( - '--skip-wiki', action='store_true', dest='skip_wiki') - optparser.add_option( - '--skip-unsupported-check', action='store_true', dest='skip_unsupported_check') - - return optparser - - -re_username = re.compile(r"^[a-z\-0-9]+$") - - -def make_valid_sf_username(orig_username): - sf_username = orig_username.replace('_', '-').lower() - - # FIXME username translation is hardcoded here: - sf_username = dict( - rlevy='ramilevy', - mkeisler='mkeisler', - bthale='bthale', - mmuller='mattjustmull', - MalcolmDwyer='slagheap', - tjyang='tjyang', - manaic='maniac76', - srinid='cnudav', - es='est016', - david_peyer='david-mmi', - okruse='ottokruse', - jvp='jvpmoto', - dmorelli='dmorelli', - ).get(sf_username, sf_username + '-mmi') - - if not re_username.match(sf_username): - adjusted_username = ''.join( - ch for ch in sf_username[:-4] - if ch.isalnum() or ch == '-') + '-mmi' - log.error('invalid sf_username characters: %s Changing it to %s', - sf_username, adjusted_username) - sf_username = adjusted_username - if len(sf_username) > 15: - adjusted_username = sf_username[0:15 - 4] + '-mmi' - log.error('invalid sf_username length: %s Changing it to %s', - sf_username, adjusted_username) - sf_username = adjusted_username - return sf_username - - -if __name__ == '__main__': - logging.basicConfig(level=logging.WARN) - log.setLevel(logging.DEBUG) - main() - - -def test_make_valid_sf_username(): - tests = { - # basic - 'foo': 'foo-mmi', - # lookup - 'rlevy': 'ramilevy', - # too long - 'u012345678901234567890': 'u0123456789-mmi', - 'foo^213': 'foo213-mmi' - } - for k, v in tests.items(): - assert make_valid_sf_username(k) == v - - -def test_convert_post_content(): - nbhd = Object() - nbhd.url_prefix = '/motorola/' - text = '''rel100? or ?rel101 or rel102 or rel103a or rel104''' - mapping = dict( - rel100='rel/100/', - rel101='rel/101/', - rel102='rel/102/', - rel103='rel/103/', - rel104='rel/104/') - converted = convert_post_content(mapping, 'foo', text, nbhd) - assert 'href="/projects/foo.motorola/files/rel/100' in converted, converted - assert 'href="/projects/foo.motorola/files/rel/101' in converted, converted - assert 'href="/projects/foo.motorola/files/rel/102' in converted, converted - assert 'href="/projects/foo.motorola/files/rel/103' not in converted, converted - assert 'href="/projects/foo.motorola/files/rel/104' in converted, converted - - -def test_convert_markup(): - - markup = ''' -!this is the first headline -Please note that this project is for distributing, discussing, and supporting the open source software we release. - -[http://www.google.com] - -[SourceForge |http://www.sf.net] - -[$ProjectHome/myimage.jpg] -[$ProjectHome/anotherimage.jpg] - -!!! Project Statistics - -|[sf:frsStatistics]|[sf:artifactStatistics]| - ''' - - new_markup = wiki2markdown(markup) - assert '\n[[img src=myimage.jpg]]\n[[img src=anotherimage.jpg]]\n' in new_markup - assert '\n###this is the first' in new_markup - assert '<http://www.google.com>' in new_markup - assert '[SourceForge ](http://www.sf.net)' in new_markup - assert '\n# Project Statistics' not in new_markup - assert '[sf:frsStatistics]' not in new_markup