Good Evening, I have a conundrum regarding JSON objects and converting them to CSV:
*Context* - I am converting XML files to a JSON object (please see snippet below) and then finally producing a CSV file. Here is a an example JSON object: "PAC": { "Account": [{ "PC": "0", "CMC": "0", "WC": "0", "DLA": "0", "CN": null, "FC": { "Int32": ["0", "0", "0", "0", "0"] }, "F": { "Description": null, "Code": "0" } In general, when I convert any of the files from JSON to CSV, I have been successful when using the following: import csv import json import sys def hook(obj): return obj def flatten(obj): for k, v in obj: if isinstance(v, list): yield from flatten(v) else: yield k, v if __name__ == "__main__": with open("somefileneame.json") as f: data = json.load(f, object_pairs_hook=hook) pairs = list(flatten(data)) writer = csv.writer(sys.stdout) header = writer.writerow([k for k, v in pairs]) row = writer.writerow([v for k, v in pairs]) #writer.writerows for any other iterable object However with the example JSON object (above) i receive the following error when applying this function: ValueError: too many values to unpack Here are some more samples. 1. "FC": {"Int32": ["0","0","0","0","0","0"]} 2. "PBA": {"Double": ["0","0","0","0","0","0","0","0"]} 3. "PBDD": { "DateTime": ["1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM", "1/1/0001 12:00:00 AM"] }, In the above example, I would like to remove the keys *Int32*, *Double *and *DateTime*. I am wondering if there is a function or methodology that would allow me to remove such nested keys and reassign the new keys to the outer key (in this case above *FC, PBA *and *PBDD*) as column headers in a CSV and concatenate all of the values within the list (as corresponding fields). Also, here is how I strategized my XML to CSV conversion (if this is of any use): import xml.etree.cElementTree as ElementTree from xml.etree.ElementTree import XMLParser import json import csv import tokenize import token try: from collections import OrderedDict import json except ImportError: from ordereddict import OrderedDict import simplejson as json import itertools import six import string from csvkit import CSVKitWriter class XmlListConfig(list): def __init__(self, aList): for element in aList: if element: # treat like dict if len(element) == 1 or element[0].tag != element[1].tag: self.append(XmlDictConfig(element)) # treat like list elif element[0].tag == element[1].tag: self.append(XmlListConfig(element)) elif element.text: text = element.text.strip() if text: self.append(text) class XmlDictConfig(dict): ''' Example usage: >>> tree = ElementTree.parse('your_file.xml') >>> root = tree.getroot() >>> xmldict = XmlDictConfig(root) Or, if you want to use an XML string: >>> root = ElementTree.XML(xml_string) >>> xmldict = XmlDictConfig(root) And then use xmldict for what it is..a dictionary. ''' def __init__(self, parent_element): if parent_element.items(): self.update(dict(parent_element.items())) for element in parent_element: if element: # treat like dict - we assume that if the first two tags # in a series are different, then they are all different. if len(element) == 1 or element[0].tag != element[1].tag: aDict = XmlDictConfig(element) # treat like list - we assume that if the first two tags # in a series are the same, then the rest are the same. else: # here, we put the list in dictionary; the key is the # tag name the list elements all share in common, and # the value is the list itself aDict = {element[0].tag: XmlListConfig(element)} # if the tag has attributes, add those to the dict if element.items(): aDict.update(dict(element.items())) self.update({element.tag: aDict}) # this assumes that if you've got an attribute in a tag, # you won't be having any text. This may or may not be a # good idea -- time will tell. It works for the way we are # currently doing XML configuration files... elif element.items(): self.update({element.tag: dict(element.items())}) # finally, if there are no child tags and no attributes, extract # the text else: self.update({element.tag: element.text}) def main(): #Lines 88-89stantiate the class Elementree #and applies the method to recursively traverse from the root node #XmlDictConfig is instantiated in line 90 with open('C:\\Users\\wynsa2\\Desktop\\Python Folder\\PCSU\\Trial2_PCSU\\2-Response.xml', 'r', encoding='utf-8') as f: xml_string = f.read() xml_string= xml_string.replace('�', '') root = ElementTree.XML(xml_string) xmldict = XmlDictConfig(root) json_str = json.dumps(xmldict, sort_keys=True, indent=4, separators=(',', ': ')) newly_formatted_data = json.loads(json_str) #encode into JSON with open('data2.json', 'w') as f: #writing JSON file json.dump(newly_formatted_data, f) I hope that I was clear in my description. Thank you all for your help. Sincerely, Saran _______________________________________________ Tutor maillist - Tutor@python.org To unsubscribe or change subscription options: https://mail.python.org/mailman/listinfo/tutor