#!/usr/bin/python
#
# Script to convert iTunes xml database to rhythmbox xml database.
# Jacques Fortier (2004) <jfortier-rb@blergl.net>
#
# Usage: itunes2rhythm.py < /path/to/iTunes\ Music\ Library.xml > ~/.gnome2/rhythmbox/rhythmdb.xml
#
# Note: This script will overwrite any existing rhythmbox database, so you it with care!
#
# You'll definitely need to tweak the drivemapping dictionary to tell the script
# where your windows drives are mounted.
#
# You may also want to modify some of the defaults found below. These are only used
# when the given tag is missing from the iTunes db.

#
# Use drivemapping to convert from the Windows drive letters found in your
# itunes database to your linux mountpoints.
#
# If you don't mount a drive in linux and you want any files on that
# drive to be ignored instead of causing an error, set the mount point
# to None. For example: "localhost/C:" : None,
drivemapping = { 
                 "localhost/C:" : None,
                 "localhost/D:" : None,
               }

# Defaults to use when the tag isn't found in the iTunes db
DEFAULT_TITLE = "Unknown"
DEFAULT_GENRE = "Unknown"
DEFAULT_ALBUM = "Unknown"
DEFAULT_ARTIST = "Unknown"
DEFAULT_RATING = 0
DEFAULT_AUTORATE = 1
# You probably don't want to modify these two
DEFAULT_TRACKNUMBER = -1
DEFUALT_MTIME = 0


# The meat of the script follows

from time import mktime, strptime, localtime, timezone
from xml.dom.minidom import parse
from xml.sax.saxutils import escape, unescape
import xml.dom
import sys
import re

def convtime( strtime ):
    return mktime( strptime( strtime, '%Y-%m-%dT%H:%M:%SZ' ) ) - timezone 

def convlocation( location ):
    match = re.match(r"file://(\w+/[A-Z]:)(/.+)", location)
    if not match:
        sys.exit( "Invalid location: " + location )
    drive = match.group(1)
    path = match.group(2)
    if not drivemapping.has_key( drive ):
        sys.exit("Unknown drive: %s" % drive)
    mapped_drive = drivemapping[drive]
    if( mapped_drive == None ):
        return None
    if path[-1] == '/':
        path = path[0:-1]
    return "file://" + mapped_drive + path
    


def getStringTag( keytag ):
    sibling = keytag.nextSibling
    while sibling:
        if sibling.nodeType == xml.dom.Node.ELEMENT_NODE:
            break
        sibling = sibling.nextSibling
    else:
        return None
    if sibling.tagName != 'string':
        return None
    return sibling.firstChild.data

def getIntegerTag( keytag ):
    sibling = keytag.nextSibling
    while sibling:
        if sibling.nodeType == xml.dom.Node.ELEMENT_NODE:
            break
        sibling = sibling.nextSibling
    else:
        return None
    if sibling.tagName != 'integer':
        return None
    return int(sibling.firstChild.data)

def getDateTag( keytag ):
    sibling = keytag.nextSibling
    while sibling:
        if sibling.nodeType == xml.dom.Node.ELEMENT_NODE:
            break
        sibling = sibling.nextSibling
    else:
        return None
    if sibling.tagName != 'date':
        return None
    return convtime(sibling.firstChild.data)


itunesdb = parse( sys.stdin )

plist = itunesdb.documentElement

if plist.tagName != 'plist':
    sys.exit('Not a valid itunes db: No plist tag')

for keytag in plist.getElementsByTagName('key'):
    if keytag.firstChild.data != 'Tracks':
        continue
    sibling = keytag.nextSibling
    while sibling:
        if sibling.nodeType == xml.dom.Node.ELEMENT_NODE:
            break
        sibling = sibling.nextSibling
    else:
        sys.exit("Not a valid itunes db: Tracks key not followed by an element")
    if sibling.tagName == 'dict':
        maindict = sibling
        break
else:
    sys.exit('Not a valid itunes db: Could not find tracks dict')

print ('<?xml version="1.0" standalone="yes"?>').encode("utf-8")
print ('<rhythmdb version="1.0">').encode("utf-8")
for child in maindict.childNodes:
    if child.nodeType != xml.dom.Node.ELEMENT_NODE or child.tagName != 'dict':
        continue
    title = None
    artist = None
    album = None
    genre = None
    location = None
    tracknumber = None
    filesize = None
    duration = None
    rating = None
    autorate = None
    lastplayed = None
    playcount = None
    mtime = None
    for keytag in child.childNodes:
        if keytag.nodeType != xml.dom.Node.ELEMENT_NODE or keytag.tagName != 'key':
            continue
        if keytag.firstChild.nodeType != xml.dom.Node.TEXT_NODE:
            continue
        keytype = keytag.firstChild.data
        if keytype == 'Name':
            title = getStringTag( keytag )
        elif keytype == 'Artist':
            artist = getStringTag( keytag )
        elif keytype == 'Album':
            album = getStringTag( keytag )
        elif keytype == 'Genre':
            genre = getStringTag( keytag )
        elif keytype == 'Location':
            #location = convlocation( getStringTag( keytag ) )
	    location = getStringTag( keytag ) 
        elif keytype == 'Track Number':
            tracknumber = getIntegerTag( keytag )
        elif keytype == 'Size':
            filesize = getIntegerTag( keytag )
        elif keytype == 'Total Time':
            duration = getIntegerTag( keytag ) / 1000
        elif keytype == 'Rating':
            rating = getIntegerTag( keytag ) / 20.0
        elif keytype == 'Date Modified':
            mtime = getDateTag( keytag )
        elif keytype == 'Play Date UTC':
            lastplayed = getDateTag( keytag )
        elif keytype == 'Play Count':
            playcount = getIntegerTag( keytag )
    if not location or not filesize or not duration:
        continue
    if not title:
        title = DEFAULT_TITLE
    if not artist:
        artist = DEFAULT_ARTIST
    if not album:
        album = DEFAULT_ALBUM
    if not genre:
        genre = DEFAULT_GENRE
    if not tracknumber:
        tracknumber = DEFAULT_TRACKNUMBER
    if not rating:
        rating = DEFAULT_RATING
    if not autorate:
        autorate = DEFAULT_AUTORATE
    if not mtime:
        mtime = DEFAULT_MTIME
    print('  <entry type="song">').encode('utf-8')
    print('    <title>%s</title>' % escape(title)).encode('utf-8')
    print('    <genre>%s</genre>' % escape(genre)).encode('utf-8')
    print('    <artist>%s</artist>' % escape(artist)).encode('utf-8')
    print('    <album>%s</album>' % escape(album)).encode('utf-8')
    print('    <track-number>%d</track-number>' % tracknumber).encode('utf-8')
    print('    <duration>%d</duration>' % duration).encode('utf-8')
    print('    <file-size>%d</file-size>' % filesize).encode('utf-8')
    print('    <location>%s</location>' % escape(location)).encode('utf-8')
    print('    <mtime>%d</mtime>' % mtime).encode('utf-8')
    print('    <rating>%0.6f</rating>' % rating).encode('utf-8')
    print('    <auto-rate>%d</auto-rate>' % autorate).encode('utf-8')
    if lastplayed and playcount:
        print('    <play-count>%d</play-count>' % playcount).encode('utf-8')
        print('    <last-played>%d</last-played>' % lastplayed).encode('utf-8')
    print('    <mimetype></mimetype>').encode('utf-8')
    print('  </entry>').encode('utf-8')

print('</rhythmdb>').encode('utf-8')
