On Oct 5, 12:46 pm, Joseph Reagle <rea...@mit.edu> wrote: > I would think the commented code would be faster (fewer loops), but it is > not (because of function calls). > > #Average user_time = 5.9975 over 4 iterations > inSRC = set([bio.name for bio in bios.values()]) > inEB = set([bio.name for bio in bios.values() if bio.eb_title]) > inWP = set([bio.name for bio in bios.values() if bio.wp_title]) > inBoth = inEB & inWP > missingEB = inSRC - inEB > missingWP = inSRC - inWP > missingBoth = missingEB & missingWP > avg_size_ratio = find_average( > [bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc]) > mdn_size_ratio = find_median( > [bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc]) > SRCfem = set([bio.name for bio in bios.values() if bio.gender > == 'female']) > EBfem = set([bio.name for bio in bios.values() if bio.eb_gender > == 'female']) > WPfem = set([bio.name for bio in bios.values() if bio.wp_gender > == 'female']) > SRCmale = set([bio.name for bio in bios.values() if bio.gender > == 'male']) > EBmale = set([bio.name for bio in bios.values() if bio.eb_gender > == 'male']) > WPmale = set([bio.name for bio in bios.values() if bio.wp_gender > == 'male']) > SRCun = set([bio.name for bio in bios.values() if bio.gender > == 'unknown']) > EBun = set([bio.name for bio in bios.values() if bio.eb_gender > == 'unknown']) > WPun = set([bio.name for bio in bios.values() if bio.wp_gender > == 'unknown']) > > ##Average user_time = 6.0025 over 4 iterations > #def set_amend(obj, bio): > #if obj == None: > #obj = set([]) > #obj.add(bio.name) > #return obj > > #inSRC = set([]) > #inSRC = set([]) > #inEB = set([]) > #inWP = set([]) > #SRCfem = set([]) > #EBfem = set([]) > #WPfem = set([]) > #SRCmale = set([]) > #EBmale = set([]) > #WPmale = set([]) > #SRCun = set([]) > #EBun = set([]) > #WPun = set([]) > > #for bio in bios.values(): > ### use a function that takes set name (creates one) and conditional > #inSRC = set_amend(inSRC, bio) > #if bio.eb_title: inEB = set_amend(inEB, bio) > #if bio.wp_title: inWP = set_amend(inWP, bio) > #if bio.gender == 'female': SRCfem = set_amend(SRCfem, bio) > #if bio.eb_gender == 'female': EBfem = set_amend(EBfem, bio) > #if bio.wp_gender == 'female': WPfem = set_amend(WPfem,bio) > #if bio.gender == 'male': SRCmale = set_amend(SRCmale, bio) > #if bio.eb_gender == 'male': EBmale = set_amend(EBmale, bio) > #if bio.wp_gender == 'male': WPmale = set_amend(WPmale, bio) > #if bio.gender == 'unknown': SRCun = set_amend(SRCun, bio) > #if bio.eb_gender == 'unknown': EBun = set_amend(EBun, bio) > #if bio.wp_gender == 'unknown': WPun = set_amend(WPun, bio) > #inBoth = inEB & inWP > #missingEB = inSRC - inEB > #missingWP = inSRC - inWP > #missingBoth = missingEB & missingWP > #avg_size_ratio = find_average( > #[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc]) > #mdn_size_ratio = find_median( > #[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
Not only are you doing many function calls but you are assigning 12 objects each time. Why not do this? for bio in bios.values(): inSRC.add(bio) if bio.eb_title: inEB.add(bio) if bio.wp_title: inWP.add(bio) if bio.gender == 'female': SRCfem.add(bio) if bio.eb_gender == 'female': EBfem.add(bio) if bio.wp_gender == 'female': WPfem.add(bio) if bio.gender == 'male': SRCmale.add(bio) if bio.eb_gender == 'male': EBmale.add(bio) if bio.wp_gender == 'male': WPmale.add(bio) if bio.gender == 'unknown': SRCun.add(bio) if bio.eb_gender == 'unknown': EBun.add(bio) if bio.wp_gender == 'unknown': WPun.add(bio) -- http://mail.python.org/mailman/listinfo/python-list