古いバージョンのpythonとpostgreでDBのデータをxmlに変換して出力した

python2.4 とpostgreを使ってDBのバックアップを取るためにxmlとして出力する必要がありまして、その時のメモです。
方法は二つあって、DOMをメモリ上に全部もってからまとめて吐き出す方法と地道に一つずつ書き出していく方法があります。
小さなDBだったらまとめて吐き出せばいいと思うのですが、今回50万行以上のレコードがあったためそれだとメモリに乗らずに
スワップを使い切りそうでした。(実行してから教えてもらって気づいた・・・。)

以下、実例。

# -*- coding: utf-8 -*-
import datetime
import pgdb
import xml.dom.minidom, codecs
from xml.sax import saxutils


HOST = "localhost"
DATABASE = "sample"
USER = "ayahito"
PASSWORD = ""
FILE_NAME = "backup.xml"

STEP_NUM = 10000


def get_connecter(host, database, user, password):
    connecter = pgdb.connect(
        host=host,database=database,user=user,password=password)
    return connecter


def get_cursor(connecter):
    return connecter.cursor()


def get_count(connecter, tablename):
    cursor = get_cursor(connecter)
    cursor.execute("""
        SELECT COUNT(*) FROM %s;
        """ % (tablename))
    rows = cursor.fetchall()
    count = rows[0][0]
    return count


def get_table_names(connecter):
    cursor = get_cursor(connecter)
    cursor.execute("""SELECT relname AS table_name FROM
        pg_stat_user_tables;""")
    rows = cursor.fetchall()
    rows = [row[0] for row in rows]
    return rows


def get_table_records_and_column_names(connecter, table_name):
    cursor = get_cursor(connecter)
    cursor.execute("SELECT * from %s" % (table_name))
    col_names = [cn[0] for cn in cursor.description]
    col_rows = cursor.fetchall()
    return col_rows, col_names


def get_xml_doc():
    doc = xml.dom.minidom.Document()
    return doc


def create_root(doc):
    root = doc.createElement('root')
    doc.appendChild(root)
    return root

def create_child(doc, parent, elem_name):
    child = doc.createElement(elem_name)
    parent.appendChild(child)
    return child

def append_value_to_elem(doc, elem, value):
    if not isinstance(value, str):
        value = str(value)
    elem.appendChild(doc.createTextNode(value))


def write(doc, file_name):
    f1 = codecs.open(file_name, 'w', 'utf-8')
    doc.writexml(f1)
    f1.close()


def write_root(file_name):
    file_name.write("""<?xml version="1.0" ?><root>""")

def write_end_root(file_name):
    file_name.write("""</root>""")

def write_start_tag(file_name, name):
    file_name.write("<" + saxutils.escape(name) + ">")

def write_end_tag(file_name, name):
    file_name.write("</" + saxutils.escape(name) + ">")


def write_value(file_name, value):
    if (isinstance(value, int) or isinstance(value, float) or
        isinstance(value, long)):
        value = str(value)
    elif (isinstance(value, datetime.datetime) or
            isinstance(value, datetime.date)):
        value = "%s" % (value.isoformat())
    elif isinstance(value, str):
        value = value.decode('utf-8')
    else:
        value = str(value)
    file_name.write(saxutils.escape(value))


def step_execute(f, table_name, connecter):
    records = True
    last_used_id = 0
    while records:
        cursor = get_cursor(connecter)
        cursor.execute(
            "SELECT * from %s where id > %s order by id limit %s" % (
            table_name, last_used_id, STEP_NUM))
        col_names = [cn[0] for cn in cursor.description]
        records = cursor.fetchall()
        write_row(f, records, col_names)
        if records:
            last_record = records[-1]
            last_used_id = last_record[col_names.index("id")]


def write_row(f, records, col_names):
    for record in records:
        write_start_tag(f, "row")
        for col_name, value in zip(col_names, record):
            write_start_tag(f, col_name)
            write_value(f, value)
            write_end_tag(f, col_name)
        write_end_tag(f, "row")


def execute():
    doc = get_xml_doc()
    root = create_root(doc)
    connecter = get_connecter(HOST, DATABASE, USER, PASSWORD)
    table_names = get_table_names(connecter)
    for table_name in table_names:
        table_elem = create_child(doc, root, table_name)
        records, col_names = get_table_records_and_column_names(
            connecter, table_name)
        for record in records:
            row_elem = create_child(doc, table_elem, "row")
            for col_name, value in zip(col_names, record):
                colum_elem = create_child(doc, row_elem, col_name)
                append_value_to_elem(doc, colum_elem, value)
    write(doc, FILE_NAME)


def less_memory_execute():
    connecter = get_connecter(HOST, DATABASE, USER, PASSWORD)
    table_names = get_table_names(connecter)
    f = codecs.open("backup.xml","w","utf-8")
    write_root(f)
    for table_name in table_names:
        write_start_tag(f, table_name)
        if get_count(connecter, table_name) > STEP_NUM:
            step_execute(f, table_name, connecter)
        else:
            records, col_names = get_table_records_and_column_names(
                connecter, table_name)
            write_row(f, records, col_names)
        write_end_tag(f, table_name)
    write_end_root(f)


if __name__ == "__main__":
    less_memory_execute()

メモリに一度に乗らないように決められた行数分だけレコードを取得して
ファイルに書き出しています。定数使ったのでSTEP_NUM書き換えれば一度に取得する値も変えられます。