#Copyright 2008 Guy Harrison # guy.a.harrison@gmail.com # # Licensed under the Apache License, Version 2.0 (the "License"), see: # http://www.apache.org/licenses/LICENSE-2.0 # usage=""" Utility to generate Google App Engine code to load data from an Oracle table Usage: GAEOraLoad.py applicationName username/password@TNS tableMask [rowLimit] applicationName Name of generated application in app.yaml username/password@TNS provides an Oracle connection string to the owner of the tables tableMask is either the name of a table or a wildcard to multiple table rowLimit Maximum number of rows to load Outputs: app.yaml file [table]Loader.py code which provides bulkloader code for a particular table [table].csv file which contains the CSV data from the table to be loaded Requires: cx_Oracle module: http://python.net/crew/atuining/cx_Oracle/ Example: GAEOraLoad.py myApp scott/tiger EMP creates EmpLoader.py which has the EmpLoad entry point(to load data) creates EmpQuery.py which has the EmpQry entry point (to display data) To Load the data (locally), you would issue the following command: bulkload_client.py --filename=Emp.csv --kind=Emp --url=http://localhost:8080/EmpLoad Known issues: Unicode characters may cause 500: Internal server error during load Floats cannot have missing values, they are set to -1 when the CSV is generated """ import cx_Oracle import sys # # Main routine # def main(argv): if (len(argv) < 4) : print "Usage: "+sys.argv[0]+" username/password@connection TableWildcard [rowLimit] " print usage sys.exit(2) try: appName=argv[1] connString=argv[2] tableMask=argv[3] if len(argv) >4: rowLimit=argv[4] else: rowLimit=-1 conn = cx_Oracle.connect (connString) cursor=conn.cursor() cursor.execute("alter session set sql_trace true") cursor.execute("alter session set tracefile_identifier=GAEOraLoad") tables=getTables(conn,tableMask) classFileName="loaderclasses.py" classfile=open(classFileName,'w') for tableName in tables: processTable(conn,tableName,classfile,rowLimit) # create files for each table genLoaderScript(tables) # A batch file to load everything classfile.close() print "Created %s with python definitions for all Oracle tables" % classFileName writeYaml(appName,tables) # The yaml file to control the resulting app except cx_Oracle.Error, e: print e[0] sys.exit(1) # # Create a batch file to load every table # should work in bash if you change "set" to "export" and %%variables to $variables # def genLoaderScript(tables): loaderBat=open("loadAll.bat",'w') loaderSh=open("loadAll.sh",'w') url="http://localhost:8080/" loaderBat.write("set GAE_HOME=c:\\tools\\google_appengine\n") loaderSh.write("export GAE_HOME=/usr/local/google_appengine\n") loaderBat.write("set PATH=%PATH%;%GAE_HOME%\\tools\n") loaderSh.write("export PATH=$PATH:${GAE_HOME}/tools\n") loaderBat.write("set URL=%s\n" % url ) loaderSh.write("export URL=%s\n" % url ) for table in tables: t=table.capitalize() cmd="python %%GAE_HOME%%\\tools\\bulkload_client.py --filename=%s.csv --kind=%s --url=%%URL%%%sLoad \n" \ % (t,t,t) loaderBat.write(cmd) cmd="python ${GAE_HOME}/tools/bulkload_client.py --filename=%s.csv --kind=%s --url=${URL}%sLoad \n" \ % (t,t,t) loaderSh.write(cmd) loaderBat.close() loaderSh.close() print "Created loadAll.bat with commands to load all tables on Windows" print "Created loadAll.sh with commands to load all tables on *nix" # # Return a list of tables matching the table mask provided # def getTables(conn,tableMask): sqlText="select table_name from user_tables where table_name like '%s'" cursor = conn.cursor() tables=[] cursor.execute(sqlText %tableMask) while (1): row=cursor.fetchone() if row == None: break tables.append(row[0]) return(tables) # # Generate a list of table names and attributes for a table # def getTableColumns(conn,tableName): cursor = conn.cursor() sqlText1="""SELECT LOWER (column_name) column_name, CASE data_type WHEN 'NUMBER' THEN 'Number' WHEN 'DATE' THEN 'Date' ELSE 'String' END data_type, decode(nullable,'N','Required=True','') nullable FROM user_tab_columns where table_name='%s'""" cursor.execute (sqlText1 % (tableName) ) tableColumns=[] while (1): row = cursor.fetchone () if row == None: break #print row[0] tableColumns.append({'name':row[0],'type':row[1],'null':row[2]}) return(tableColumns) # # Process a single table # def processTable(conn,tableName,classfile,rowLimit): # # Generate a list of columns # print "Processing table %s" % tableName tableColumns=getTableColumns(conn,tableName) # The definition of the table as a python class classText=classDefinition(tableName,tableColumns) classfile.write(classText) # The python code to load the Oracle table data into the BigTable version writeBulkLoader(tableName,tableColumns,classText) # The python code to extract the table data from BigTable writeQueryUrl(tableName,tableColumns,classText) # A CSV file containing the table data writeCsvFiles(conn,tableName,tableColumns,rowLimit) print "\tIssue the following command to load table %s" % tableName t=tableName.capitalize() print "\t\tbulkload_client.py --filename=%s.csv --kind=%s --url=http://localhost:8080/%sLoad " % (t,t,t) # # Write the python code to extract the table data from BigTable # def writeQueryUrl(tableName,tableColumns,classText): tabCap=tableName.capitalize() qryTxt="from google.appengine.ext import db\n" qryTxt+="import cgi\nimport wsgiref.handlers\n" qryTxt+="from google.appengine.api import users\nfrom google.appengine.ext import webapp\n\n" qryTxt+=classText qryTxt+="class MainPage(webapp.RequestHandler):\n\tdef get(self):\n" qryTxt+="\t\trows = %s.all().fetch(100)\n" % tabCap qryTxt+="\t\tprint \"\\n

First 100 rows for %s

\"\n" % tabCap for col in tableColumns: qryTxt+="\t\tprint \"\"\n" % col['name'] qryTxt+="\t\tprint \"\"\n" qryTxt+="\t\tfor row in rows:\n" qryTxt+="\t\t\tprint \"\"\n" for col in tableColumns: qryTxt+="\t\t\tprint \"\"\n" % col['name'] qryTxt+="\t\t\tprint \"\"\n" qryTxt+="\t\tprint \"
%s
\"+str(row.%s)+\"
\"\n\n" qryTxt+="def main():\n" qryTxt+="\tapplication = webapp.WSGIApplication([('/%sQry', MainPage),],debug=True)\n" % tabCap qryTxt+="\twsgiref.handlers.CGIHandler().run(application)\n\n" qryTxt+="if __name__ == \"__main__\":\n\tmain()\n" filename=tabCap+"Query.py" qryFile=open(filename,'w') qryFile.write(qryTxt) qryFile.close() print "\tCreated %s to query %s" % (filename,tableName) # # Dump out table data to a CSV file for loading into BigTable # def writeCsvFiles(conn,tableName,tableColumns,rowLimit): colExpr=[] for col in tableColumns: if col['type']=='String': colExpr.append("'\"'||nvl(replace(%s,'\"','\\\"'),' ')||'\"'" % col['name'] ) elif col['type']=='Date': colExpr.append("'\"'||nvl(to_char(%s,'YYYY-MM-DD:HH:MI:SS'),' ')||'\"'" % col['name'] ) else: colExpr.append("nvl(to_char(%s),'-1')" % col['name'] ) colList=','.join(colExpr) sqlText= "Select "+colList+" from %s " % tableName if rowLimit > 0 : sqlText+=" WHERE ROWNUM <= %s " % rowLimit fileName= tableName.capitalize()+".csv" csvfile=open(fileName,'w') cursor = conn.cursor() cursor.arraysize=1000 cursor.execute(sqlText) while (1): row = cursor.fetchone () if row == None: break csvfile.write(','.join(row)+"\n") csvfile.close() print "\tCreated %s with data from %s" % (fileName,tableName) # # Generate a python class definition corresponding to the Oracle table # def classDefinition(tableName,tableColumns): classDefStr="class %s(db.Model): \n" % tableName.capitalize() for column in tableColumns: colname=column['name'] if column['type']=='Number' : type='db.FloatProperty' elif column['type']=='Date' : type='db.DateTimeProperty' else: type='db.StringProperty' if column['null']=='Y': reqString='required=True' else: reqString='' classDefStr=classDefStr =classDefStr+"\t%s=%s(%s)\n" % (colname,type,reqString) return(classDefStr+"\n\n") # # Write the python code to load the CSV into BigTable # def writeBulkLoader(tableName,tableColumns,classText): tabCap=tableName.capitalize() filename=tabCap+"Loader.py" loaderClassFile=open(filename,'w') blText="from google.appengine.ext import db\n" blText+="from google.appengine.ext import bulkload\n" blText+="from google.appengine.api import datastore_types\n" blText+="from google.appengine.ext import search\n\n" blText+=classText+"\n" blText+="class %sLoader(bulkload.Loader):\n" % tabCap blText+="\tdef __init__(self):\n" # Our 'Person' entity contains a name string and an email blText+="\t\tbulkload.Loader.__init__(self, '%s',[\n" % tabCap for col in tableColumns: if col['type']=='Date': blText+="\t\t\t('%s', lambda x: datastore_types.datetime.datetime.strptime(x, '%%Y-%%m-%%d:%%H:%%M:%%S')),\n" % col['name'] elif col['type']=='Number': blText+="\t\t\t('%s', float),\n" % col['name'] else: blText+="\t\t\t('%s', str),\n" % col['name'] blText+="\t\t\t])\n" blText+="\tdef HandleEntity(self, entity):\n" blText+="\t\tent = search.SearchableEntity(entity)\n" blText+="\t\treturn ent\n\n" blText+="if __name__ == '__main__':\n" blText+="\tbulkload.main(%sLoader())\n" % tabCap loaderClassFile.write(blText) loaderClassFile.close() print "\tCreated %s to load %s" % (filename,tableName) # # Create the app.yaml file with all the neccessary entry points # def writeYaml(appName,tables): yamlFile=open("app.yaml",'w') yamlText="application: %s\nversion: 1\nruntime: python\napi_version: 1\nhandlers:\n" % appName for table in tables: t=table.capitalize() yamlText+="- url: /%sLoad\n script: %sLoader.py\n" % (t,t) yamlText+="- url: /%sQry\n script: %sQuery.py\n" % (t,t) yamlFile.write(yamlText) yamlFile.close() print "Created app.yaml for application %s" % appName main(sys.argv)