"""
Outputs separate space-delimited text files for each chromosome.
Files have 3-7 columns: marker name, position, 1-4 alleles.
Runs on mapped genotypic columns.

Author: Autumn Laughbaum, Golden Helix, Inc.
Last Updated: 2012-05-10
"""

ghi.requireVersion('7.4')
scriptName = 'Export Beagle Marker File By Chromosome.py'
   
import os
   
def beagleOut(dm,filePath,baseName,delim):

    m = 'Export beagle marker files to text format\n' + \
        ' *Filepath: ' + filePath + '\n' + \
        ' *Basename: ' + baseName + '\n' + \
        ' *Delimiter: '
    if delim == ' ':
        m += 'Space\n'
    elif delim == '\t':
        m += 'Tab\n'
    m += '\nCreated the following files in ' + str(filePath) + ':\n'
        
    chrvect = dm.markerMapChromosomes()
    posvect = dm.markerMapPositions()
    chrList = dm.orderedChrList()

    
    progress = ghi.progressDialog("Writing marker files...", 100)
    progress.show()
    count = 0
    N = len(chrvect)

    curChr = chrvect[0]
    newFiles = 1
    f,tempM = new_file(None,curChr,filePath,baseName)                
    m += tempM
       
    for i in range(1,dm.numCols()+1):
        chr = chrvect[i-1]
        pos = str(posvect[i-1])
        if chr != curChr:
            #New file
            f,tempM = new_file(f,chr,filePath,baseName)
            curChr = chr
            m += tempM
            newFiles += 1
        #Write Genotypic data
        header = dm.colHeader(i).replace(' ','_')
        f.write(header + delim + pos + delim)
        alleleCounts = dm.genotypeAlleleCounts(i)
        alleleList = [alleleCounts[j][0] for j in range(len(alleleCounts)-1)] #All alleles present, does not include ? (last position)
        if len(alleleList) == 1: #Must be at least two allele columns
            alleleList.append(alleleList[0]) 
        else:
            alleleList.sort()
        f.write(delim.join(alleleList) + '\n')
        
        if progress.wasCanceled():
            return
        if 100.0*i/N > count:
            count = 100.0*i/N
            progress.setProgress(count)
        
    progress.setProgress(100)
    progress.finish()
    
    dm.appendLog(m)
    ghi.message('Created ' + str(newFiles) + ' files in ' + filePath)


def new_file(f,chr,filePath,baseName):
   
    if f:
        f.close()
    path = os.path.join(filePath,baseName + '_chr' + str(chr) + '.txt')
    tempM = ' - ' + baseName + '_chr' + str(chr) + '.txt\n'
    f = open(path,'w')
            
    return f,tempM


def check(ss):
    try:
        dm = ss.dataModel(ghi.const.FilterMapped|ghi.const.FilterGenotypic)
    except:
        ghi.message("Spreadsheet must contain marker mapped genotypic columns.")
        dm = None
    
    if dm.markerMapOrientation() != ghi.const.MapOrientationColumns:
        ghi.message("Spreadsheet must contain marker mapped genotypic columns.")
        dm = None
        
    return dm
    
    
def prompt():
    ss = ghi.getCurrentObject()
    dm = check(ss)
    if not dm:
        return
        
    defaultName = dm.nodeName().split(' - ')[0].replace(' ','_')
       
    pdl = [{"label":"Choose output directory:","type":"dir","name":"dirPath"},
           {"label":"Specify the base file name:","type":"string","name":"baseName","default":defaultName},
           {"label":"Choose the file delimiter:","type":"combo","name":"delim",
           "list":["Whitespace Delimited","Tab Delimited"]}]
      
    prompt = ghi.promptDialog(pdl, title="Beagle Markers file by Chromosome files", width=400)
    if not prompt:
        return
        
    filePath = prompt['dirPath']
    baseName = prompt['baseName']
    if prompt['delim'] == 'Whitespace Delimited':
        delim = ' '
    elif prompt['delim'] == 'Tab Delimited':
        delim = '\t'
        
    beagleOut(dm,filePath,baseName,delim)


try:
    prompt()
except:
    ghi.error()