import pybel

def fp_to_ascii( fp):
  return "".join( ["%08x"%num for num in fp])
    
def ascii_to_fp( ascii):
  ret = []
  for i in range( 0, 256, 8):
    ret.append( int( ascii[i:i+8], 16))
  return ret

def count_bits( num):
  count = 0
  while num:
    count += num & 1
    num >>= 1
  return count


# we prepare a database-like file for later use
f = file( 'structures.200.txt', 'r')
db_file = file( "fingerprints.db", "w")
for line in f:
  sm = line.split("\t")[2]
  mol = pybel.readstring( "smi", sm)
  fp = mol.calcfp()
  db_file.write( "%s %s\n" % (sm, fp_to_ascii( fp.fp)))
f.close()
db_file.close()

# now we do some similarity searching
db_file = file( "fingerprints.db", "r")
smiles1, finger1 = db_file.readline().split()
fp1 = ascii_to_fp( finger1)
db_file.seek( 0)
for line_num, line in enumerate( db_file):
  smiles, finger = line.split()
  fp = ascii_to_fp( finger)
  # now we calcualte tanimoto
  common, all = 0, 0
  for i, num in enumerate( fp):
    num1 = fp1[i]
    common += count_bits( num1 & num)
    all += count_bits( num1 | num)
  tanimoto = 1.0*common/all
  print "%.2f  %03d  %s" % (tanimoto, line_num, smiles)
  if line_num > 20:
    break
db_file.close()
