#!/usr/bin/python
#-*- coding: utf-8 -*-

import sys, os
import re
import optparse
import csv
from optparse import OptionParser
progress = True
try:
   from progressbar import ProgressBar
except:
   progress = False

headers = [
   [('Pval',1), ('Kappa',2), ("Kappa'",4), ('Net charge',5)],
   [('D11+',1), ('D11-',2), ('D10',3)],
   [('Q20',1), ('Q21+',2), ('Q21-',3), ('Q22+',4), ('Q22-',5)],
   [('O30',1), ('O31+',2), ('O31-',3), ('O32+',4), ('O32-',5), ('O33+',6), ('O33-',7)],
   [('H40',1),('H41+',2),('H41-',3),('H42+',4),('H42-',5),('H43+',6),('H43-',7),('H44+',8),('H44-',9)],
   [('X', 1), ('Y', 2), ('Z', 3), ('OZ', 4), ('ISO', 5)],
   [('U11',1),('U22',2),('U33',3),('U12',4),('U13',5),('U23',6)],
]
  
def output_csv(output, ofile):
   csv_writer = csv.writer(ofile, dialect='excel',delimiter=';',quoting=csv.QUOTE_MINIMAL, lineterminator='\n')
   head = []
   for column in output[0]:
      head.append(column.keys()[0])
   csv_writer.writerow(head)   

   for cycle in output:
      line = []
      for column in cycle:
         line.append(column.values()[0])
      csv_writer.writerow(line)
   

def output_standard(output, ofile):
      for column in output[0]:  # Column names in the first line
         print >> ofile, "%s\t"%column.keys()[0],
      print >> ofile

      for cycle in output:
         for column in cycle:
            print >> ofile, "%s\t"%column.values()[0], 
         print >> ofile

def find_line(lines, token):
   for i, line in enumerate(lines):
      if line.find(token) >= 0:
         return i


def find_table(lines, num):
   j = 0
   tofind = 'Table %d.'%num
   for i in range(len(lines)):
      if lines[i].find(tofind) > 0:  # find the Table head first
         j=i
         break
   while lines[j].find('---') != 0: # find the next line with ---
      j+=1
   return j+1

# Returns a list of all atoms in the geofile
def find_atoms(geofile):
   gfile = None
   try:
      gfile = file(geofile, 'r')
   except IOError, e:
      print "Error: No such file or directory: '%s'"%geofile
      sys.exit(1)

   start = 0
   lines = gfile.readlines()
   start = find_table(lines, 1)

   i = 0
   atoms = []
   regexp = re.compile('\s+([^\s]+)\s')
   while 1:
      line = lines[start+i]
      if line[:3] == '---':  #atoms start at the third line after ---
         break
      atoms.append( re.match(regexp, line).group(1) )
      i += 1
   
   gfile.close()
   return atoms

# Searches for the given atom in a table and
# returns the line where the atom was found or "None" otherwise.
def find_atomline(lines, atom, start):
   i = start
   while 1:
      line = lines[i]
      pos = line.find(atom)
      if pos >= 0 and pos <= 2:
         return line
      i += 1
      if line.find('---') == 0 or line.strip() == '': #Hier ist die Tabelle zu Ende
         return None

def split_atomline(line):
   # Sometimes columns with negative value can be too 
   # long so they lack a white space in between.
   # When this happens, we split them manually
   columns = line.split()
   ret = columns
   if line.find(')-') > 0:
      for i,col in enumerate(columns):
         pos = col.find(')-')
         if pos > 0:
            a,c = col.split(')-')
            ret.insert(i, '-'+c)
            ret.insert(i, a+')')
            ret.pop(i+2)
   return ret
      

def extract_features(lines, atom, cyclecount):
      cycle = []
      #Loops through all tables containing a header
      #In each table the features of the current atom are extracted
      for tnum in range(len(headers)-2): #loop over table number
         start = find_table(lines, tnum+1)
         atom_line = find_atomline(lines, atom, start)
         if atom_line == None: continue #Atom not in this table

         cols = atom_line.split()
         for c in headers[tnum]: #column
            value = cols[c[1]]
            if not options.deviation:
               pos = value.find('(')
               if pos >=0: 
                  value = value[:pos]
            cycle.append( {c[0]: value } )

      #These tables need extra handling
      for tnum, token in enumerate(('Coordinates', 'Uij values')):
         start = find_line(lines, token)
         atom_line = find_atomline(lines, atom, start)
         if atom_line == None: continue

         cols = split_atomline( atom_line )
         for c in headers[tnum+5]:
            tmp = c[1]
            value = cols[tmp]
            if not options.deviation:
               pos = value.find('(')
               if pos >=0: 
                  value = value[:pos].rstrip()
            cycle.append( {c[0]: value } )
      
      cycle.insert(0, {'Nummer': "%03d"%cyclecount})
      return cycle

# Assembles the features of an atom
# and returns a list of column dicts, 
# where the key of each dict is the name of the feature.
def handle_geofiles(atom, geofile_pattern):
   cyclecount = 1 
   output = []
   cycle = []

   while 1: #Loops as long as geo.out files are found
      fname = geofile_pattern%cyclecount
      if not os.access(fname, os.R_OK): # check if geo file exists
         break

      geofile = file(fname, 'r')
      lines = tuple(geofile.readlines())

      cycle = extract_features(lines, atom, cyclecount)
      output.append( cycle )
      cyclecount += 1
   return output
   

def handle_atom(atom, geofile_pattern):
   output = handle_geofiles(atom, geofile_pattern)

   ofile=None
   if options.csvout:
      ofile = file(options.outdir+os.sep+'%s.csv'%atom, 'w')
      output_csv(output, ofile)
   else:
      ofile = file(options.outdir+os.sep+'%s.txt'%atom, 'w')
      output_standard(output, ofile)

   ofile.close()

def extract_geofile_pattern(geofile):
   match = re.match("(.*?)([0]+)(\d)(\w*.\w+)", geofile)
   out = ""
   if match:
      groups = match.groups()
      start = ""
      if groups[0]:
         start = groups[0]
      zeros = groups[1]

   out = start + "%0" + str(len(zeros)+1) + "d" + groups[3]
   return out

if __name__ == '__main__':
   parser = OptionParser(version="%prog-03")
   parser.add_option("-s", "--std-dev",  dest="deviation", action="store_true",
      help="Include standard deviation in output. Default: False", default=False)
   parser.add_option("-c", "--csv",  dest="csvout", action="store_true",
      help="Enables output format CSV. Default: False", default=False)
   parser.add_option("-i", "--input", default="xd01_geo.out", dest="geofile", 
      help="Name of the first input file. Default: %s"%"xd01_geo.out")
   parser.add_option("-o", "--outdir", default="pyout/", dest="outdir",
      help="Output directory. Default: pyout/")

   (options, args) = parser.parse_args()

   try:
      os.mkdir(options.outdir)
   except:
      pass

   atoms = find_atoms(options.geofile)

   if progress:
      bar = ProgressBar(width=70)
   num_atom = len(atoms)
   i = 1.0
   for atom in atoms: #Each atom will be written to an extra file
      if progress:
         bar( (i/num_atom) * 100 )
      else:
         print atom,
         sys.stdout.flush()
      pattern = extract_geofile_pattern( options.geofile )
      handle_atom(atom, pattern)
      i += 1
   else:
      print


# vim:sts=3:ts=3:sw=3