Teaching basic lab skills
for research computing

Solution to Sets and Dictionaries Exercise

Last week, I posted an exercise on working with sets and dictionaries that also included a fair bit of file I/O and string manipulation. My solution is below, in four parts, along with the code produced in each. If someone would like to re-do the file parsing using regular expressions, I'd be happy to post that as well.

import sys

#--------------------

def parse_pair(pair):
    '''
    Parse an atom-count pair.  If the count is missing, assume
    that the count value is 1.
    '''
    if '*' not in pair:
        return pair, 1
    atom, count = pair.split('*')
    count = int(count)
    return atom, count

#--------------------

def parse_molecule(text):
    '''
    Get a single molecule description from a text string.
    '''
    name, formula_text = text.split(':')
    name = name.strip()
    pairs = formula_text.strip().split('.')
    formula = {}
    for p in pairs:
        atom, count = parse_pair(p)
        assert atom not in formula, \
               'Already seen atom %s in text %s' % (atom, text)
        formula[atom] = count
    return name, formula

#--------------------

def read_molecules(reader):
    '''
    Read molecules from a molecule file, returning a dictionary
    of {name : formula} pairs.
    '''
    result = {}
    for line in reader:
        line = line.strip()
        if (not line) or line.startswith('#'):
            continue
        name, formula = parse_molecule(line)
        assert name not in result, \
               'Already seen %s!' % name
        result[name] = formula
    return result

#--------------------

print read_molecules(sys.stdin)

Part 2

def merge(left, right):
    result = {}

    for key in left:
        # Only in left
        if key not in right:
            result[key] = left[key]
        # In both, so check that values are the same.
        else:
            if left[key] == right[key]:
                result[key] = left[key]

    for key in right:
        # Only in right.
        if key not in left:
            result[key] = right[key]

    return result

Part 3

import sys
from nano import read_molecules

#--------------------

def can_produce(formulas, atom):
    '''
    Return the set of molecules that contain the given atom.
    '''
    result = set()
    for molecule in formulas:
        if atom in formulas[molecule]:
            result.add(molecule)
    return result

#--------------------

if __name__ == '__main__':
    data = read_molecules(sys.stdin)
    atom = sys.argv[1]
    print can_produce(data, atom)

Part 4

import sys
from nano import read_molecules
from merge import merge
from produce import can_produce

#--------------------

def get_data(filename):
    if len(filenames) == 0:
        data = read_molecules(sys.stdin)
    else:
        data = {}
        for f in filenames:
            reader = open(f, 'r')
            more_data = read_molecules(reader)
            reader.close()
            data = merge(data, more_data)
    return data

#--------------------

if __name__ == '__main__':
    assert len(sys.argv) >= 2, 'Usage: final.py atom [files...]'
    atom_name = sys.argv[1]
    filenames = sys.argv[2:]
    data = get_data(filenames)
    makeable = can_produce(data, atom_name)
    makeable = list(makeable)
    makeable.sort()
    for m in makeable:
        print m

Dialogue & Discussion

You can review our commenting policy here.