Hi - 

I would like to export a large amount of data from ExpressionEngine to Wordpress, and have had lots of trouble finding something that isn't miles over my head. I did find these three scripts, which seem to be perfect for this purpose, but I don't know how to go about implementing them. It's a three-step process:

1. Configure and run Databases.cfg
2. Run ExpressionEngineExport.py
3. Run WordPressImport.py.

I have a mac, am all set with access to my dbs on my host, all I need is to be pointed in the right direction. Anyone?

Thank you!

Mindy


Attachment: Databases.cfg
Description: Binary data

#!/opt/local/bin/python

import xml.sax.saxutils
from sqlobject import *
from elementtree import ElementTree
import datetime
from ConfigParser import ConfigParser
import os
import sys

### Read the URL from the config file
theParser = ConfigParser()
theParser.read(os.path.dirname(sys.argv[0]) + '/Databases.cfg')
theURL = theParser.get('Databases', 'input')

connection = connectionForURI(theURL)

sqlhub.processConnection = connection

class Weblog(SQLObject):
	class sqlmeta:
		table = 'exp_weblogs'
		idName = 'weblog_id'
	
	name = StringCol(length=40, dbName='blog_name')
	title = StringCol(length=100, dbName='blog_title')
	url = StringCol(length=100, dbName='blog_url')

	titles = MultipleJoin('Title', joinColumn = 'weblog_id')
	data = MultipleJoin('Data', joinColumn = 'weblog_id')

class Title(SQLObject):
	class sqlmeta:
		table = 'exp_weblog_titles'	
		idName = 'entry_id'

	title = StringCol(length=100, dbName='title')
	entry_date = IntCol()
	slug = StringCol(length=75, dbName='url_title')
	data = MultipleJoin('Data', joinColumn = 'entry_id')
	comments = MultipleJoin('Comment', joinColumn = 'entry_id')
	trackbacks = MultipleJoin('Trackback', joinColumn = 'entry_id')
	categories = RelatedJoin('Category', intermediateTable='exp_category_posts', joinColumn='entry_id', otherColumn='cat_id')

class Field(SQLObject):
	class sqlmeta:
		table = 'exp_weblog_fields'	
		idName = 'field_id'

	name = StringCol(length=32, dbName='field_name')
	label = StringCol(length=50, dbName='field_label')

class Data(SQLObject):
	class sqlmeta:
		table = 'exp_weblog_data'	
		idName = 'entry_id'

	fieldId1 = StringCol(dbName='field_id_1')
	fieldFt1 = StringCol(length=40, dbName='field_ft_1')

	fieldId2 = StringCol(dbName='field_id_2')
	fieldFt2 = StringCol(length=40, dbName='field_ft_2')

	fieldId3 = StringCol(dbName='field_id_3')
	fieldFt3 = StringCol(length=40, dbName='field_ft_3')

	fieldId4 = StringCol(dbName='field_id_4')
	fieldFt4 = StringCol(length=40, dbName='field_ft_4')

class Comment(SQLObject):
	class sqlmeta:
		table = 'exp_comments'	
		idName = 'comment_id'

	status = StringCol(length=1, dbName='status')
	name = StringCol(length=50, dbName='name')
	email = StringCol(length=50, dbName='email')
	url = StringCol(length=75, dbName='url')
	location = StringCol(length=50, dbName='location')
	ip_address = StringCol(length=16, dbName='ip_address')
	comment_date = IntCol(dbName='comment_date')
	edit_date = DateTimeCol(dbName='edit_date')
	comment = StringCol(dbName='comment')

class Trackback(SQLObject):
	class sqlmeta:
		table = 'exp_trackbacks'	
		idName = 'trackback_id'

	weblog_id = IntCol(dbName='weblog_id')
	title = StringCol(length=100, dbName='title')
	content = StringCol(dbName='content')
	weblog_name = StringCol(length=100, dbName='weblog_name')
	trackback_url = StringCol(length=200, dbName='trackback_url')
	trackback_date = IntCol(dbName='trackback_date')
	trackback_ip = StringCol(length=16, dbName='trackback_ip')

class Category(SQLObject):
	class sqlmeta:
		table = 'exp_categories'	
		idName = 'cat_id'
	
	group_id = IntCol(dbName='group_id')
	parent_id = IntCol(dbName='parent_id')
	name = StringCol(length=60, dbName='cat_name')
	image = StringCol(length=120, dbName='cat_image')
	description = StringCol(dbName='cat_description')
	order = IntCol(dbName='cat_order')

	entries = RelatedJoin('Title', intermediateTable='exp_category_posts', joinColumn='cat_id', otherColumn='entry_id')


# class CategoryPosts(SQLObject):
# 	class sqlmeta:
# 		table = 'exp_category_posts'	
# 		idName = 'cat_id'
# 
# 
# CREATE TABLE `exp_category_posts` (
#   `entry_id` int(10) unsigned NOT NULL default '0',
#   `cat_id` int(10) unsigned NOT NULL default '0',
#   KEY `entry_id` (`entry_id`),
#   KEY `cat_id` (`cat_id`)
# ) ENGINE=MyISAM;



########################################################################
def encode(string):
	return xml.sax.saxutils.escape(string)
#	return xml.sax.saxutils.escape(string).encode('UTF-8')


def tag(tag, content):
	if not content:
		return ''
	else:
		return '<%s>%s</%s>' % (tag, content, tag)

theWeblog = Weblog.select()[0]

theEntriesXML = ElementTree.Element('entries')
for theEntry in theWeblog.titles:
	print '# Importing record!'

	theEntryXML = ElementTree.SubElement(theEntriesXML, 'entry')
	print theEntry.title



	ElementTree.SubElement(theEntryXML, 'id').text = str(theEntry.id)
	ElementTree.SubElement(theEntryXML, 'title').text = theEntry.title
	ElementTree.SubElement(theEntryXML, 'slug').text = theEntry.slug
	ElementTree.SubElement(theEntryXML, 'entry_date').text = datetime.datetime.fromtimestamp(int(theEntry.entry_date)).isoformat()

	theData = theEntry.data[0]

	if theData.fieldId1 != '':
		theFieldName = Field.select(Field.q.id == 1)[0].name
		ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId1

	if theData.fieldId2 != '':
		theFieldName = Field.select(Field.q.id == 2)[0].name
		ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId2
	
	if theData.fieldId3 != '':
		theFieldName = Field.select(Field.q.id == 3)[0].name
		ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId3

	if theData.fieldId4 != '':
		theFieldName = Field.select(Field.q.id == 4)[0].name
		ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId4

	try:
		if len(theEntry.categories) > 0:
			theCategoriesXML = ElementTree.SubElement(theEntryXML, 'categories')
			for theCategory in theEntry.categories:
				theCategoryXML = ElementTree.SubElement(theCategoriesXML, 'category').text = theCategory.name
	except:
		print '# Problem with categories'

	if len(theEntry.comments) > 0:
		theCommentsXML = ElementTree.SubElement(theEntryXML, 'comments')
		for theComment in theEntry.comments:
			theCommentXML = ElementTree.SubElement(theCommentsXML, 'comment')
			ElementTree.SubElement(theCommentXML, 'id').text = str(theComment.id)
			if theComment.name != '':
				ElementTree.SubElement(theCommentXML, 'name').text = theComment.name
			if theComment.url != '':
				ElementTree.SubElement(theCommentXML, 'url').text = theComment.url
			if theComment.location != '':
				ElementTree.SubElement(theCommentXML, 'location').text = theComment.location
			if theComment.ip_address != '':
				ElementTree.SubElement(theCommentXML, 'ip_address').text = theComment.ip_address
			ElementTree.SubElement(theCommentXML, 'date').text = datetime.datetime.fromtimestamp(int(theComment.comment_date)).isoformat()
			ElementTree.SubElement(theCommentXML, 'edit_date').text = theComment.edit_date.isoformat()

			if theComment.comment != '':
				ElementTree.SubElement(theCommentXML, 'comment').text = theComment.comment

	if len(theEntry.trackbacks) > 0:
		theTrackbacksXML = ElementTree.SubElement(theEntryXML, 'trackbacks')
		for theTrackback in theEntry.trackbacks:
			theTrackbackXML = ElementTree.SubElement(theTrackbacksXML, 'trackback')
			ElementTree.SubElement(theTrackbackXML, 'id').text = str(theTrackback.id)
			if theTrackback.content != '':
				ElementTree.SubElement(theTrackbackXML, 'content').text = theTrackback.content
			if theTrackback.weblog_name != '':
				ElementTree.SubElement(theTrackbackXML, 'weblog_name').text = theTrackback.weblog_name
			if theTrackback.trackback_url != '':
				ElementTree.SubElement(theTrackbackXML, 'url').text = theTrackback.trackback_url
			ElementTree.SubElement(theTrackbackXML, 'date').text = datetime.datetime.fromtimestamp(int(theTrackback.trackback_date)).isoformat()
			if theTrackback.trackback_ip != '':
				ElementTree.SubElement(theTrackbackXML, 'ip_address').text = theTrackback.trackback_ip


file('/Users/schwa/Desktop/Export.xml', 'w').write(ElementTree.tostring(theEntriesXML))
#!/opt/local/bin/python

from sqlobject import *
from elementtree import ElementTree
import datetime
import time
import tzinfo

################################################################################

def delete(cls, *args, **kwargs):
	for theRow in cls.select():
		theRow.destroySelf()
SQLObject.delete = classmethod(delete)


def deleteBy(cls, *args, **kwargs):
	for theRow in cls.selectBy(*args, **kwargs):
		theRow.destroySelf()
SQLObject.deleteBy = classmethod(deleteBy)

################################################################################

def processTime(inTimeString):
	theDatetime = datetime.datetime(*tuple(list(time.strptime(inTimeString, '%Y-%m-%dT%H:%M:%S')[0:7]) + [tzinfo.utc]))
	return(theDatetime)

################################################################################

import codecs

def MyErrorHandler(error):
	return (u'', error.end)
codecs.register_error('MyErrorHandler', MyErrorHandler)

def Purify(value):
	if value != None:
		value = value.encode('utf8', 'MyErrorHandler')
	else:
		value = ''
	return value

################################################################################

### Read the URL from the config file
theParser = ConfigParser()
theParser.read(os.path.dirname(sys.argv[0]) + '/Databases.cfg')
theURL = theParser.get('Databases', 'export')

connection = connectionForURI(theURL)

sqlhub.processConnection = connection

class Post(SQLObject):
	class sqlmeta:
		table = 'wp_posts'	
		idName = 'ID'

	post_author = IntCol()
	post_date = DateTimeCol(default = datetime.datetime.now)
	post_date_gmt = DateTimeCol(default = datetime.datetime.utcnow)
	post_content = StringCol(default = '')
	post_title = StringCol(default = '')
	post_category = IntCol(default = 0)
	post_excerpt = StringCol(default = '')
	post_status = EnumCol(enumValues = ['publish','draft','private','static','object','attachment'], default = 'publish')
	comment_status = EnumCol(enumValues = ['open','closed','registered_only'], default = 'open')
	ping_status = EnumCol(enumValues = ['open', 'closed'], default = 'open')
	post_password = StringCol(default = '')
	post_name = StringCol(default = '')
	to_ping = StringCol(default = '')
	pinged = StringCol(default = '')
	post_modified = DateTimeCol(default = datetime.datetime.now)
	post_modified_gmt = DateTimeCol(default = datetime.datetime.utcnow)
	post_parent = IntCol(default = 0)
	guid = StringCol(default = '')
	menu_order = IntCol(default = 0)
	post_type = StringCol(default = '')
	post_mime_type = StringCol(default = '')
	comment_count = IntCol(default = 0)
	
	categories = RelatedJoin('Category', intermediateTable='wp_post2cat', joinColumn='post_id', otherColumn='category_id')
	comments = MultipleJoin('Comment', joinColumn = 'comment_post_ID')


class Category(SQLObject):
	class sqlmeta:
		table = 'wp_categories'
		idName = 'cat_ID'
	
	cat_name = StringCol(default = '')
	category_nicename = StringCol(default = '')
	category_description = StringCol(default = '')
	category_parent = IntCol(default = 0)
	category_count = IntCol(default = 0)
	
	entries = RelatedJoin('Title', intermediateTable='wp_post2cat', joinColumn='category_id', otherColumn='post_id')


class Comment(SQLObject):
	class sqlmeta:
		table = 'wp_comments'
		idName = 'comment_ID'
	
	comment_post_ID = IntCol(dbName = 'comment_post_ID')
	comment_author = StringCol(default = '')
	comment_author_email = StringCol(default = '')
	comment_author_url = StringCol(default = '')
	comment_author_IP = StringCol(default = '', dbName = 'comment_author_IP')
	comment_date = DateTimeCol(default = datetime.datetime.now)
	comment_date_gmt = DateTimeCol(default = datetime.datetime.utcnow)
	comment_content = StringCol(default = '')
	comment_karma = IntCol(default = 0)
	comment_approved = EnumCol(enumValues = ['0', '1', 'spam'], default = '1')
	comment_agent = StringCol(default = '')
	comment_type = StringCol(default = '') ### 'pingback'
	comment_parent = IntCol(default = 0)
	user_id = IntCol(default = 0)

class User(SQLObject):
	class sqlmeta:
		table = 'wp_users'
		idName = 'ID'
	user_login = StringCol()
	user_pass = StringCol()
	user_nicename = StringCol()
	user_email = StringCol()
	user_url = StringCol()
	user_registered = DateTimeCol(default = datetime.datetime.now)
	user_activation_key = StringCol()
	user_status = IntCol
	display_name = StringCol()
	
Post.delete()
Category.delete()
Comment.delete()

Category(cat_name = 'Uncategorised')

theUser = User.selectBy(user_login = 'schwa')[0]
print theUser.id

def XMLImport(data, overwrite = True):
	theRootNode = ElementTree.fromstring(data)

	################################################################################
	theNodesToProcess = len(theRootNode)
#	theNodesToProcess = 2
	for theEntryNode in theRootNode[:theNodesToProcess]:

		theEntryId = int(theEntryNode.find('id').text)

		if overwrite == True:
			print('Deleting entry(%d)' % theEntryId)
			Post.deleteBy(id = theEntryId)

		theTitle = Purify(theEntryNode.find('title').text)
		print('Creating Entry(%s)' % theTitle)

		theSlug = theEntryNode.find('slug').text

		theEntryDate = processTime(theEntryNode.find('entry_date').text).astimezone(tzinfo.Eastern)
		theEntryDateGMT = processTime(theEntryNode.find('entry_date').text)
		print('%s' % theEntryDate)

		theContent = ''

		theBody = theEntryNode.find('body')
		theSummary = theEntryNode.find('summary')
		theSource = theEntryNode.find('source')
		theExtended = theEntryNode.find('extended')
		
		if (theBody == None and theSummary != None):
			theBody = theSummary
			theSummary = None
			
		if (theBody != None):
			theBody = Purify(theBody.text)

		if (theExtended != None):
			theExtended = Purify(theExtended.text)
	
		if (theSummary != None):
			theSummary = Purify(theSummary.text)
		else:
			theSummary = ''

		if theSource != None:
			theSource = Purify(theSource.text)
		
		theContent = theBody
		if theExtended:
			theContent = theContent + '<br/>' + theExtended
		if theSource:
			theContent = theContent + '<hr/><pre>%s</pre>' % theSource
		
		thePost = Post(id = theEntryId, post_name = theSlug, post_author = theUser.id, post_title = theTitle, post_content = theContent, post_date = theEntryDate, post_date_gmt = theEntryDateGMT, post_excerpt = theSummary)

		for theCategoryNode in theEntryNode.findall('categories/category'):
			theCategoryName = theCategoryNode.text
			if Category.selectBy(cat_name = theCategoryName).count() == 0:
				theCategory = Category(cat_name = theCategoryName)
			else:
				theCategory = Category.selectBy(cat_name = theCategoryName, cat_nicename = theCategoryName)[0]
			theCategory.category_count = theCategory.category_count + 1
			
			thePost.addCategory(theCategory)

		for theCommentNode in theEntryNode.findall('comments/comment'):
			print theCommentNode
		
			theAuthor = Purify(theCommentNode.find('name').text)
			if theCommentNode.find('url') != None:
				theUrl = theCommentNode.find('url').text
			else:
				theUrl = ''
			theIP = theCommentNode.find('ip_address').text
			theDateGMT = processTime(theCommentNode.find('date').text)
			theDate = processTime(theCommentNode.find('date').text).astimezone(tzinfo.Eastern)
			theText = Purify(theCommentNode.find('comment').text)
			
			theComment = Comment(comment_post_ID = thePost.id, comment_author = theAuthor, comment_author_IP = theIP, comment_author_url = theUrl, comment_date = theDate, comment_date_gmt = theDateGMT, comment_content = theText)
			thePost.comment_count = thePost.comment_count + 1

# 			<trackback>
# 			<id>4</id>
# 			<content>Running something useful on Xgrid This article is the third in a series on Xgrid, see Part I and Part II. In the present article, we look at a real life example to see how one can use Xgrid to...</content>
# 			<weblog_name>: Simple.</weblog_name>
# 			<url>http://unu.novajo.ca/simple/archives/000024.html</url>
# 			<date>2004-01-15T18:05:04</date>
# 			<ip_address>67.70.6.38</ip_address>
# 			</trackback>

		for theTrackbackNode in theEntryNode.findall('trackbacks/trackback'):
			print theTrackbackNode
		
			if theTrackbackNode.find('weblog_name') != None:
				theAuthor = Purify(theTrackbackNode.find('weblog_name').text)
			else:
				theAuthor = ''

			if theTrackbackNode.find('url') != None:
				theUrl = theTrackbackNode.find('url').text
			else:
				theUrl = ''
				
			theIP = theTrackbackNode.find('ip_address').text
			theDateGMT = processTime(theTrackbackNode.find('date').text)
			theDate = processTime(theTrackbackNode.find('date').text).astimezone(tzinfo.Eastern)
			
			theComment = Comment(comment_post_ID = thePost.id, comment_author = theAuthor, comment_author_IP = theIP, comment_author_url = theUrl, comment_date = theDate, comment_date_gmt = theDateGMT, comment_type = 'pingback')
			thePost.comment_count = thePost.comment_count + 1



#         see what others are running as well keep up the great
#         work</comment>


	
XMLImport(file('/Volumes/Shared/Users/schwa/Desktop/export.xml').read())



.................................................
Melinda Roberts


_______________________________________________
Tutor maillist  -  Tutor@python.org
http://mail.python.org/mailman/listinfo/tutor

Reply via email to