Source code for importbets



import MySQLdb
import os, sys
import utils
import database

[docs]class importbets: """USED TO IMPORT OLD ARCHIVE DATA FROM THE OLD WETTERTURNIER. AS I WILL NEVER USE THIS ROUTINE AGAIN I DONT DO THE DOCUMENTATION FOR ALL THE METHODS IN HERE. Args: config (:obj:`dict`): Configuration dict from :meth:`utils.readconfig`. file_forced (:obj:`str`): File to force in. .. todo:: Migration script. Remove in the near future! """ data1 = None data2 = None cityID = None prefix = 'wp_' db_users = prefix + 'users' db_param = prefix + 'wetterturnier_param' db_bets = prefix + 'wetterturnier_bets' db_betstat = prefix + 'wetterturnier_betstat' db_cities = prefix + 'wetterturnier_cities' db_obs = prefix + 'wetterturnier_obs' unames = None # --------------------------------------------------------------- # - Init method # --------------------------------------------------------------- def __init__(self,config,file_forced): self.file_forced = file_forced self.config = config host = self.config['mysql_host'] user = self.config['mysql_user'] passwd = self.config['mysql_pass'] db = self.config['mysql_db'] print '* Establishing database connection' self.db = database.database(config) # --------------------------------------------------------------- # - Loading data from uri, then extract first and second block # Input can be uri (file=False!) or a file name (file=True!) # --------------------------------------------------------------- def loadfile(self,uri,file=False): from datetime import datetime as dt import urllib def getLines(code): return code.readlines() def findstring(lines,string,start=0): for i in range(start,len(lines)): if string in lines[i]: return i return None # ----------------------------------------------------------- # - Reading data from url # Find hash in lines # ----------------------------------------------------------- if not file: code = urllib.urlopen(uri) lines = getLines(code) else: fid = open(uri,'r') lines = fid.readlines() fid.close() self.raw_lines = lines # --------------------------------------------------------------- # - Simply import a file. # --------------------------------------------------------------- def loadfilecontent(self,file): import os import utils self._file = file if not os.path.isfile(file): utils.exit('In importbets.loadfilecontent error: cannot find file %s' % file) fid = open(file,'r'); self.raw_lines = fid.readlines(); fid.close() # --------------------------------------------------------------- # - Loading data from uri, then extract first and second block # Input can be uri (file=False!) or a file name (file=True!) # --------------------------------------------------------------- def takedata(self,lines,file=False): from datetime import datetime as dt def getLines(code): res = [] i = 0 while i < 400: res.append(str(code.readline())) i+=1 return res def findstring(lines,string,start=0): for i in range(start,len(lines)): if string in lines[i]: return i return None # - Rewrite lines self.raw_lines = lines # --------------------------------------------------------------- # - Tournament date # --------------------------------------------------------------- a = findstring(lines,'Wetterprognoseturnier') date = lines[a].strip()[-10:] # --------------------------------------------------------------- # - Getting data. Searching for Samstag:, Sonntag: and # the datum again which defines the blocks. # --------------------------------------------------------------- b1_from = findstring(lines,'Samstag:') b2_from = findstring(lines,'Sonntag:') b1_to = b2_from if not file: b2_to = findstring(lines,date,b2_from) else: b2_to = len(lines) # --------------------------------------------------------------- # - Searching for the block where the Abgabezeiten are set # --------------------------------------------------------------- t1_from = findstring(lines,'Abgabezeitpunkt:') t1_to = findstring(lines,'Auswertung Prognose') # --------------------------------------------------------------- # - Searching for the block where the Point sums are set # --------------------------------------------------------------- p_from = findstring(lines,'Punkte') p_to = findstring(lines,'durchschnittliche Punktzahl') if not p_to: p_to = findstring(lines,'aktuelle Jahreszeitenwertung') # --------------------------------------------------------------- # - Create timestamp # --------------------------------------------------------------- self.tdate = ( dt.strptime(date,'%d.%m.%Y') - \ dt.strptime('1970-01-01 00:00','%Y-%m-%d %H:%M') ).days # --------------------------------------------------------------- # - Info # --------------------------------------------------------------- print '* loadfile method says' print ' - %-20s %s' % ('Date from:',date) print ' - %-20s %3d %3d' % ('Block Saturday:',b1_from,b1_to) print ' - %-20s %3d %3d' % ('Block sunday:',b2_from,b2_to) if not p_from == None and not p_to == None: print ' - %-20s %d %d' % ('The points:',p_from,p_to) self.data1 = lines[b1_from:b1_to] self.data2 = lines[b2_from:b2_to] # - Observations do start at the same position as the bets # but we have to read just the next 5 lines. self.obs1 = lines[b1_from:(b1_from+5)] self.obs2 = lines[b2_from:(b2_from+5)] if p_from == None or p_to == None: self.points = None else: self.points = lines[p_from:p_to] # - The times where the bets have been placed if t1_from == None or t1_to == None: self.bettimes = None print 'Have not found start/end for bettimes! t1_from/t1_to wrong (None)' else: self.bettimes = lines[t1_from:t1_to] print ' - %-20s %d %d' % ('Block bettimes:',t1_from,t1_to) # --------------------------------------------------------------- # - Identify city and load city ID from database # --------------------------------------------------------------- def identify_city(self): from pywetterturnier import utils print "* Try to identify city based on the first few lines of the file" # - Only try first 5 lines. city_hash = False for line in self.raw_lines[0:5]: if not 'Wetterprognoseturnier' in line: continue if 'Innsbruck' in line: city_hash = 'IBK' elif 'Berlin' in line: city_hash = 'BER' elif 'Wien' in line: city_hash = 'VIE' elif 'Leipzig' in line: city_hash = 'LEI' elif 'richer' in line: city_hash = 'ZUR' elif 'rcher' in line: city_hash = 'ZUR' else: utils.exit('Problems identifying city in utils.identify_city in %s' % line) if not city_hash: print self.raw_lines[0:5] utils.exit('cannot identify city: in identify_city method') # Loading ID from database sql = "SELECT ID FROM " + self.db_cities + " WHERE " + \ "hash = \"" + city_hash + "\"" cur = self.db.cursor() cur.execute(sql) ID = cur.fetchone()[0] try: ID = int(ID) except: utils.exit('Problem converting the city ID into an integer value!') print ' - Identified city as [%s] with ID [%d]' % (city_hash, ID) self.cityID = ID # --------------------------------------------------------------- # - Extracting bettimes now # --------------------------------------------------------------- def extract_bettimes(self): import re import datetime as dt from pywetterturnier import utils data = self.bettimes if data == None: print '[!] WARNING bettimes not found. Data empty. Skip.' return # betdate is tdate = self.tdate print '* Extracting bettimes for date %d' % (tdate) # - Check if cityID is set if self.cityID == None: utils.exit('STOP. cityID equals None. Stop now') # - Caching usernames to extract the points afterwards. hopefully. unames = [] # - Going trough lines. # Stop if we find a user without user ID. res = [] found = False for line in data: if "_________________" in line: found = True continue if not found: continue # - Link to jump to the top. Ignore. if "ganz nach oben" in line: continue line = line.replace('\n','').strip() if len(line) == 0: continue # - Username and its nicename username = line.split()[0].strip() unames.append( username ) time = " ".join(line.split()[1:]) # - For Friday, no offset # Oh c'mon wetterturnier scripters. Fuck it. if 'xxxxx' in time: tdate_str = dt.datetime.fromtimestamp( 16332*86400 ).strftime('%Y-%m-%d') time = "%s %s" % (tdate_str,'16:00:00') else: if time[0:3] == 'UTC': print 'WRNOG TIME FORMAT, SKIP' continue if 'Fri' in time: tdate_offset = 0 elif 'Mon' in time: tdate_offset = -4 elif 'Tue' in time: tdate_offset = -3 elif 'Wed' in time: tdate_offset = -2 elif 'Thu' in time: tdate_offset = -1 elif 'Sat' in time: tdate_offset = 1 elif 'Sun' in time: tdate_offset = 2 else: print time print line.strip() print '[!] BET TIME FANCY DAY NAME IN DATE. CHEATED? :)' utils.exit('reto think about it') tdate_str = dt.datetime.fromtimestamp( (tdate+tdate_offset)*86400 ).strftime('%Y-%m-%d') time = "%s %s" % (tdate_str,time.split()[1]) #time = dt.datetime.strptime(time,'%Y-%m-%d %H:%M:%S').strftime('%s') nicename = utils.nicename( username, self.config['conversion_table'] ) # - Check if we have found a group if not self.db.get_group_id( nicename ) == False: nicename = 'GRP_%s' % nicename # - Now get username userID = self.db.get_user_id_and_create_if_necessary( nicename ) # - Problem detected. if not userID: utils.exit('Problems loading userID for %s' % nicename) print ' - [%3d] %-30s %-30s %s' % (userID,username,nicename,time) # - Append res.append( (time,userID,tdate,self.cityID) ) # - Insert into database print ' - Update database now' cur = self.db.cursor() sql = 'UPDATE '+self.db.prefix+'wetterturnier_bets SET placed = %s ' + \ ' WHERE userID = %s AND tdate = %s AND cityID = %s' cur.executemany( sql, res ) self.db.commit() sql = 'UPDATE '+self.db.prefix+'wetterturnier_betstat SET submitted = %s ' + \ ' WHERE userID = %s AND tdate = %s AND cityID = %s' cur.executemany( sql, res ) self.db.commit() # - Appending cached raw usernames to the object # This will be used to extract the points afterwards. self.unames = unames # --------------------------------------------------------------- # - Extracting user parameter points now. That is a test. # --------------------------------------------------------------- def extract_parameter_points(self): if self.unames == None: utils.exit('sorry. importbets.unames not set, cannot extract the points') print '\n * %s' % 'Extracting the points for the different parameters now' # - Cacing results res = [] for uname in self.unames: found = False day = None # - Convert raw username (in uname) into nicename nicename = utils.nicename( uname, self.config['conversion_table'] ) # - Check if we have found a group if not self.db.get_group_id( nicename ) == False: nicename = 'GRP_%s' % nicename # - Now get username userID = self.db.get_user_id_and_create_if_necessary( nicename ) # - Problem detected. if not userID: utils.exit('Problems loading userID for %s' % nicename) # - Extracting points for which user? print ' - Extracting parameter points for: %s, city %s' % (nicename,self.config['current_city']) # - Now scanning the lines to get the user points for line in self.raw_lines: # - Check nicename in line if 'Auswertung' in line: if uname == utils.nicename( line.split('Spieler')[1].replace(':','').strip(), self.config['conversion_table'] ) \ or uname.replace('GRP_','') == utils.nicename( line.split('Spieler')[1].replace(':','').strip(), self.config['conversion_table'] ): found = True print ' Found line %s' % line.replace('\n','').strip() continue else: continue elif not found: continue # - Found a day? if 'Samstag' in line: day = 1 continue elif 'Sonntag' in line: day = 2 continue elif len( line.strip() ) == 0: continue elif 'Wert' in line: continue elif '________' in line: continue # - Next Auwertung? JUMP! if 'unktzahl' in line: found = False continue # - Extracting the parameter words = line.split() paramID = self.db.get_parameter_id( words[0] ) try: points = float( words[-1] ) except: print words if self.file_forced: continue # Log as broken and continue bid = open(self.config['rawdir']+'/broken_files_identified.txt','a+') bid.write( "%s\n" % self._file ) bid.close() return False #utils.exit('Problems getting points from that line (%s)' % self._file) if not paramID: utils.exit('Problems extracting parameter or getting paramID from line') #print ' %5d %8.2f %s ' % (paramID,points,line.strip()) # - Results caching. Need # userID form self.db.get_user( uname ) # cityID from self.cityID # paramID from self.db.get_parameter_id( ... ) # tdate from self.tdate # betdate from self.tdate + day res.append( (points, userID,paramID, \ self.tdate,self.tdate + day) ) sql = 'UPDATE '+self.db.prefix+'wetterturnier_bets SET points = %s ' + \ ' WHERE userID = %s AND paramID = %s AND tdate = %s ' + \ ' AND betdate = %s AND cityID = ' + str(self.cityID) cur = self.db.cursor() cur.executemany( sql , res ) self.db.commit() # --------------------------------------------------------------- # - Extracting sum points for the users # --------------------------------------------------------------- def extract_sum_points(self): print '\n * %s' % 'Extracting the sum points for the players' import re # - Cacing results res = [] found = False for line in self.points: psum = None pd1 = None pd2 = None line = line.strip() if 'Name' in line and 'Punkte' in line: continue elif len(line) == 0: continue elif '____________' in line: found = True continue elif not found: continue tmp = line.split() print ' - Found rawname %s: ' % tmp[1] print ' ResT: %s ' % ' '.join(tmp[2:]) rawdata = ''.join(tmp[2:]) rawdata = rawdata.split(')')[0] data = filter(None,re.split( '[\(//\)]', rawdata)) if not len(data) == 3 and not len(data) == 1: utils.exit('Cannot split \'%s\' as expected' % rawdata) if len(data) == 3: try: psum = float(data[0]) pd1 = float(data[1]) pd2 = float(data[2]) except: utils.exit('Problems getting points as float from \'%s\'' % rawdata) else: try: psum = float(data[0]) pd1 = False pd2 = False except: utils.exit('Problems getting points as float from \'%s\'' % rawdata) # - Convert raw username (in uname) into nicename nicename = utils.nicename( tmp[1], self.config['conversion_table'] ) # - Check if we have found a group if not self.db.get_group_id( nicename ) == False: nicename = 'GRP_%s' % nicename # - Now get username userID = self.db.get_user_id_and_create_if_necessary( nicename ) # - Problem detected. if not userID: utils.exit('Problems loading userID for %s' % nicename) # - Extracting points for which user? print ' Extracting parameter points for: %s' % nicename print ' %7.2f %7.2f %7.2f' % (psum,pd1,pd2) if not pd1: res.append( (userID, self.cityID, self.tdate, psum) ) else: res.append( (userID, self.cityID, self.tdate, psum, pd1, pd2) ) if len(res[1]) == 6: #sql = 'INSERT INTO '+self.db.prefix+'wetterturnier_betstat ' + \ sql = 'INSERT INTO '+self.db_betstat+' ' + \ ' (userID, cityID, tdate, points, points_d1, points_d2) VALUES ' + \ ' (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE ' + \ ' points=VALUES(points), points_d1=VALUES(points_d1), ' + \ ' points_d2=VALUES(points_d2) ' else: sql = 'INSERT INTO '+self.db_betstat+' ' + \ ' (userID, cityID, tdate, points) VALUES ' + \ ' (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE ' + \ ' points=VALUES(points) ' cur = self.db.cursor() cur.executemany( sql , res ) self.db.commit() # --------------------------------------------------------------- # - Extracting obs now # --------------------------------------------------------------- def extract_obs(self,block): import re from pywetterturnier import utils if block == 1: data = self.obs1 elif block == 2: data = self.obs2 else: utils.exit('wrong input to extract_obs. 1 or 2 allowed') # betdate is tdate = self.tdate betdate = tdate + block print '* Extracting obs for block %d' % (block) # - Check if cityID is set if self.cityID == None: utils.exit('cityID equals None. Stop now') # - Loading parameters param = data[1].replace('WvWn','Wv Wn').split()[1:] # - First: find empty line. Those are followed by the # single bets until another empty line shows up or until # the end is reached.. found = False obscount = 0 print "\n".join(data) for line in data: line = line.replace('\n','').strip() if "_________________" in line: found = True continue if not found: continue # - Getting station name rawname = line.split()[0].strip() station_name = utils.nicename( rawname, self.config['conversion_table'] ) wmo = self.__get_wmo_number__(station_name) if not wmo: utils.exit('Cannot find wmo number for station ' + station_name + ' in config file') print ' - Found station ' + str(wmo) + ' ' + station_name # - Check if we do have enough values if not file: if not len(param) == len(line[26:].split()): utils.exit('parameter length and data length not matching in obs line') obs = line[26:].split() else: # parse that shit by hand obs = [] for i in range(len(param)): obs.append(" ") # now try to read whats here if len( line[len(rawname):] ) == 0: continue data = line[len(rawname):].split() if not len(param) == len(data): #utils.exit('noch nicht alle obs da, stop') print 'noch nicht alle obs da, skip' continue for i in range(len(param)): obs[i] = data[i] # - Else write data into database for i in range(0,len(param)): paramID = self.wp_get_param_id(param[i]) if paramID == None: utils.exit('found param which does not exist in db') if len(obs[i].strip()) == 0: continue #####print wmo,paramID,betdate,obs[i] self.__insert_obs_to_db__(wmo,paramID,betdate,obs[i]) # - Increase obscount. If 2: break obscount = obscount + 1 if obscount == 2: break; self.db.commit() # --------------------------------------------------------------- # - Insert bets into the database. # --------------------------------------------------------------- def __insert_obs_to_db__(self,station,paramID,bdate,value): from pywetterturnier import utils # - Skip if it was not observed. if value.strip() == 'n': return True # - Else convert if value == 'x': return try: value = int(round( float(value) * 10. )) except: utils.exit('could not convert value to float') sql = 'INSERT INTO ' + self.db_obs + ' (station, paramID, betdate, value) VALUES ' + \ '({0:d},{1:d},{2:d},{3:d}) ON DUPLICATE KEY UPDATE value = {3:d}' \ .format(int(station),int(paramID),int(bdate),value) print ' obs sql: %8d %3d %8d %f' % (int(station),int(paramID),int(bdate),value) cur = self.db.cursor(); cur.execute(sql) # --------------------------------------------------------------- # - Get wmo number from config. # --------------------------------------------------------------- def __get_wmo_number__(self,string): # - Lower string from config file string = string.lower() for key in self.config['stations'].keys(): if key == string.strip(): return self.config['stations'][key] # - Remove underline (LeipzigSchkeuditz is sometimes Leupzig_Schkeudiz for key in self.config['stations'].keys(): if key == string.strip().replace('_',''): return self.config['stations'][key] return False # --------------------------------------------------------------- # - Extracting bets now # --------------------------------------------------------------- def extract_bets(self,block): from pywetterturnier import utils import re if block == 1: data = self.data1 elif block == 2: data = self.data2 else: utils.exit('wrong input to extract_bets. 1 or 2 allowed') # betdate is tdate = self.tdate betdate = tdate + block print '* Extracting data from block %d' % (block) # - Check if cityID is set if self.cityID == None: utils.exit('STOP. cityID equals None. Stop now') # - Loading parameters param = data[1].replace('WvWn','Wv Wn').split()[1:] self.unames = [] # - First: find empty line. Those are followed by the # single bets until another empty line shows up or until # the end is reached.. found = False countdown = None for line in data: line = line.replace('\n','').strip() if "______________" in line: countdown = 3 continue elif not countdown == None: countdown = countdown - 1 if countdown == 0: countdown = None found = True continue elif not found: continue if "ganz nach oben" in line: continue elif len(line.strip()) == 0: continue orig = line.split()[0] username = utils.nicename( orig, self.config['conversion_table'] ) # - Check if we have found a group if not self.db.get_group_id( username ) == False: username = 'GRP_%s' % username # - Now get username userID = self.db.get_user_id_and_create_if_necessary( username ) # - Problem detected. if not userID: utils.exit('Problems loading userID for %s' % username) self.unames.append(username) if not len(param) == len(line[len(orig):].split()): utils.exit('parameter length and data length not matching') print ' - %-25s %d' % (username, userID) # - Else write data into database data = line[len(orig):].split() for i in range(0,len(param)): paramID = self.wp_get_param_id(param[i]) if paramID == None: utils.exit('found param which does not exist in db') #print userID,paramID,tdate,betdate,data[i] self.__insert_bet_to_db__(userID,paramID,tdate,betdate,data[i]) self.__insert_betstat_to_db__(userID,tdate) self.db.commit() # --------------------------------------------------------------- # - Insert bets into the database. # --------------------------------------------------------------- def __insert_bet_to_db__(self,userID,paramID,tdate,bdate,value): from pywetterturnier import utils if value == 'x': return try: value = int(round( float(value) * 10. )) except: utils.exit('could not convert value to float') sql = 'INSERT INTO ' + self.db_bets + ' (userID, cityID, paramID, ' + \ 'tdate, betdate, value) VALUES ' + \ '({0:d},{1:d},{2:d},{3:d},{4:d},{5:d}) ON DUPLICATE KEY UPDATE value = {5:d}' \ .format(int(userID),int(self.cityID),int(paramID),int(tdate),int(bdate),value) cur = self.db.cursor(); cur.execute(sql) # --------------------------------------------------------------- # - Insert betstat into the database. # --------------------------------------------------------------- def __insert_betstat_to_db__(self,userID,tdate): sql = 'INSERT INTO ' + self.db_betstat + ' (userID, cityID, tdate) VALUES ' + \ '({0:d},{1:d},{2:d}) ON DUPLICATE KEY UPDATE tdate = {2:d}' \ .format(int(userID),int(self.cityID),int(tdate)) cur = self.db.cursor(); cur.execute(sql) # --------------------------------------------------------------- # - Create user if not existing # --------------------------------------------------------------- ###def wp_create_user(self,username): ### check = self.wp_user_exists(username) ### if not check and self.allow_create_users: ### cur = self.db.cursor() ### sql = 'INSERT INTO ' + self.db_users + \ ### ' (user_login,user_nicename,display_name) ' + \ ### ' VALUES (\'{0:s}\', \'{0:s}\', \'{0:s}\') '.format(username) ### cur.execute(sql) ### elif not check: ### log.error("MIST") ### import sys; sys.exit(3) # --------------------------------------------------------------- # - Checks if user exists # --------------------------------------------------------------- def wp_user_exists(self,username): userID = self.db.get_user_id(username) if userID == None: return False else: return True # --------------------------------------------------------------- # - Returns user id if user exists # --------------------------------------------------------------- def wp_get_param_id(self,paramname): cur = self.db.cursor() cur.execute('SELECT paramID FROM ' + self.db_param + \ ' WHERE paramName = \"' + paramname + '\"') data = cur.fetchone() if data == None: return None else: return data[0] # --------------------------------------------------------------- def close(self): self.db.close()