#!/usr/bin/env python

# TODO: Add du -sh output after the ms name if use_tb.

# Known bug: if called from outside a casapy session, tb.open(currms) will bomb
# if the given pathname for currms is too long.  "Too long" is not long enough.

from glob import glob
import locale
import os

def lsms(musthave=[], mspat="*[-_.][Mm][Ss]", combine='or', remind=True,
         sortfirst=False):
    """
    Summarize measurement sets matching certain criteria.

    Arguments:

    musthave:  A list of columns, subtables, or keywords that must be in the MS.
               If [] (default), the list of optional columns, subtables, and
               keywords of each MS will be printed.  Any entries will be
               internally uppercased.

    mspat:     A filename pattern, relative to the current directory, that the
               directory names matching of the MSes must match.
               Default: '*[-_.][Mm][Ss]'
               Tip: Try '**/*.ms' to find *.ms in . and all its subdirectories.

    combine :  Controls whether the conditions of musthave are combined with
               'or' (default) or 'and'.

    remind:    If True (default), print all columns and keywords of optional
               subtables, not just the optional ones.

    sortfirst: If sortfirst=True, print the matching MSes in alphabetical order.
               Otherwise, print each one as soon as it is found.

    Note that to fit in better with *sh behavior the argument order is reversed
    when calling from a non-python shell.  i.e. if you enter
      lsms \*_MS source polarization
    in a *sh session, it will run with
      mspat='*_MS' and musthave=['SOURCE', 'POLARIZATION'].
    (remember to quote wildcards to avoid sh expansion)
    """
    if type(musthave) == str:
        musthave = [s.replace(',', '') for s in musthave.split()]
        
    listall = True
    if musthave:
        listall = False

    msdict, use_tb = matchingMSes(musthave, mspat, combine, remind,
                                  not sortfirst, not sortfirst)

    if sortfirst:
        mses = msdict.keys()

        # Do a locale sensitive sort of mses - this and some other niceties were
        # cribbed from an implementation by Padraig Brady of ls in python at
        # http://www.pixelbeat.org/talks/python/ls.py.html
        locale.setlocale(locale.LC_ALL, '')
        mses.sort(locale.strcoll)

        # have_colors, termwidth = termprops(sys.stdout)

        for currms in mses:
            print_ms(currms, msdict[currms], listall, use_tb, remind)
        

def print_ms(currms, msdict, listall=False, use_tb=False, remind=True):
    """
    Prints the blurb in msdict, which is nominally about currms.
    """
    currmsstr = ''
    if listall:                # List all its optional things
        notindefn = []
        subtabs = msdict.keys()
        subtabs.sort()
        for st in subtabs:
            ststr = ''
            if use_tb:
                if st in mstables['req']:
                    optcols = set(msdict[st]['cols']).difference(mstables['req'][st]['req']['cols'])
                    if optcols:
                        ststr = "    Optional column"
                        ststr += string_from_list_or_set(optcols)

                    optkws = msdict[st]['kws'].difference(mstables['req'][st]['req']['kws'])
                    if optkws:
                        ststr += "    Optional keyword"
                        ststr += string_from_list_or_set(optkws)

                elif st in mstables['opt']:
                    reqcols = mstables['opt'][st]['req']['cols']
                    if remind and reqcols:
                        ststr = '    Required column'
                        ststr += string_from_list_or_set(reqcols)

                    optcols = set(msdict[st]['cols']).difference(reqcols)
                    if optcols:
                        ststr += "    Optional column"
                        ststr += string_from_list_or_set(optcols)

                    reqkws = mstables['opt'][st]['req']['kws']
                    if remind and reqkws:
                        ststr = '    Required keyword'
                        ststr += string_from_list_or_set(reqkws)

                    optkws = msdict[st]['kws']
                    optkws.difference_update(reqkws)
                    if optkws:
                        ststr += "    Optional keyword"
                        ststr += string_from_list_or_set(optkws)

                    if not ststr:
                        currmsstr += "  " + st + "\n"
                else:
                    notindefn.append(st)
            elif st not in mstables['req']:
                notindefn.append(st)

            if ststr:
                currmsstr += "  " + st + ":\n" + ststr

        if notindefn:
            notindefn.sort()
            if use_tb:
                currmsstr += "  Not in MS def'n V. 2.0: "
            currmsstr += ', '.join(notindefn) + "\n"                        

    if currmsstr:
        if use_tb:
            print currms + ":\n" + currmsstr
        else:
            print currms + ": " + currmsstr.strip()
    else:
        print currms


def string_from_list_or_set(li):
    retstr = ''
    if len(li) > 1:
        retstr += 's'
    li = list(li)
    li.sort()
    return retstr + ': ' + ', '.join(li) + "\n"

# From MS def'n v. 2, http://aips2.nrao.edu/docs/notes/229/229.html
mstables = {
    'req': {'MAIN':             {'req': {'cols': set(['TIME',
                                                      'ANTENNA1',
                                                      'ANTENNA2',
                                                      'FEED1',
                                                      'FEED2',
                                                      'DATA_DESC_ID',
                                                      'PROCESSOR_ID',
                                                      'FIELD_ID',
                                                      'INTERVAL',
                                                      'EXPOSURE',
                                                      'TIME_CENTROID',
                                                      'SCAN_NUMBER',
                                                      'ARRAY_ID',
                                                      'OBSERVATION_ID',
                                                      'STATE_ID',
                                                      'UVW',
                                                      'SIGMA',
                                                      'WEIGHT',
                                                      'FLAG',
                                                      'FLAG_CATEGORY',
                                                      'FLAG_ROW']),
                                         'kws':  set(['MS_VERSION'])},
                                 'opt': {'cols': set(['TIME_EXTRA_PREC',
                                                      'ANTENNA3',
                                                      'FEED3',
                                                      'PHASE_ID',
                                                      'PULSAR_BIN',
                                                      'PULSAR_GATE_ID',
                                                      'BASELINE_REF',
                                                      'UVW2',
                                                      'DATA',
                                                      'MODEL_DATA',
                                                      'CORRECTED_DATA',
                                                      'FLOAT_DATA',
                                                      'VIDEO_POINT',
                                                      'LAG_DATA',
                                                      'SIGMA_SPECTRUM',
                                                      'WEIGHT_SPECTRUM']),
                                         'kws':  set(['SORT_COLUMNS',
                                                      'SORT_ORDER'])}},
            'ANTENNA':          {'req': {'cols': set(['NAME',
                                                      'STATION',
                                                      'TYPE',
                                                      'MOUNT',
                                                      'POSITION',
                                                      'OFFSET',
                                                      'DISH_DIAMETER',
                                                      'FLAG_ROW']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['ORBIT_ID',
                                                      'MEAN_ORBIT',
                                                      'PHASED_ARRAY_ID']),
                                         'kws':  set([])}},
            'DATA_DESCRIPTION': {'req': {'cols': set(['SPECTRAL_WINDOW_ID',
                                                      'POLARIZATION_ID',
                                                      'FLAG_ROW']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['LAG_ID']),
                                         'kws':  set([])}},
            'FEED':             {'req': {'cols': set(['ANTENNA_ID',
                                                      'FEED_ID',
                                                      'SPECTRAL_WINDOW_ID',
                                                      'TIME',
                                                      'INTERVAL',
                                                      'NUM_RECEPTORS',
                                                      'BEAM_ID',
                                                      'BEAM_OFFSET',
                                                      'POLARIZATION_TYPE',
                                                      'POL_RESPONSE',
                                                      'POSITION',
                                                      'RECEPTOR_ANGLE']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['FOCUS_LENGTH',
                                                      'PHASED_FEED_ID']),
                                         'kws':  set([])}},
            'FIELD':            {'req': {'cols': set(['NAME',
                                                      'CODE',
                                                      'TIME',
                                                      'NUM_POLY',
                                                      'DELAY_DIR',
                                                      'PHASE_DIR',
                                                      'REFERENCE_DIR',
                                                      'SOURCE_ID',
                                                      'FLAG_ROW']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['EPHEMERIS_ID']),
                                         'kws':  set([])}},
            'FLAG_CMD':         {'req': {'cols': set(['TIME',
                                                      'INTERVAL',
                                                      'TYPE',
                                                      'REASON',
                                                      'LEVEL',
                                                      'SEVERITY',
                                                      'APPLIED',
                                                      'COMMAND']),
                                         'kws':  set([])},
                                 'opt': {'cols': set([]),
                                         'kws':  set([])}},
            'HISTORY':          {'req': {'cols': set(['TIME',
                                                      'OBSERVATION_ID',
                                                      'MESSAGE',
                                                      'PRIORITY',
                                                      'ORIGIN',
                                                      'OBJECT_ID',
                                                      'APPLICATION',
                                                      'CLI_COMMAND',
                                                      'APP_PARAMS']),
                                         'kws':  set([])},
                                 'opt': {'cols': set([]),
                                         'kws':  set([])}},
            'OBSERVATION':      {'req': {'cols': set(['TELESCOPE_NAME',
                                                      'TIME_RANGE',
                                                      'OBSERVER',
                                                      'LOG',
                                                      'SCHEDULE_TYPE',
                                                      'SCHEDULE',
                                                      'PROJECT',
                                                      'RELEASE_DATE',
                                                      'FLAG_ROW']),
                                         'kws':  set([])},
                                 'opt': {'cols': set([]),
                                         'kws':  set([])}},
            'POINTING':         {'req': {'cols': set(['ANTENNA_ID',
                                                      'TIME',
                                                      'INTERVAL',
                                                      'NAME',
                                                      'NUM_POLY',
                                                      'TIME_ORIGIN',
                                                      'DIRECTION',
                                                      'TARGET',
                                                      'TRACKING']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['POINTING_OFFSET',
                                                      'SOURCE_OFFSET',
                                                      'ENCODER',
                                                      'POINTING_MODEL_ID',
                                                      'ON_SOURCE',
                                                      'OVER_THE_TOP']),
                                         'kws':  set([])}},
            'POLARIZATION':     {'req': {'cols': set(['NUM_CORR',
                                                      'CORR_TYPE',
                                                      'CORR_PRODUCT',
                                                      'FLAG_ROW']),
                                         'kws':  set([])},
                                 'opt': {'cols': set([]),
                                         'kws':  set([])}},
            'PROCESSOR':        {'req': {'cols': set(['TYPE',
                                                      'SUB_TYPE',
                                                      'TYPE_ID',
                                                      'MODE_ID',
                                                      'FLAG_ROW']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['PASS_ID']),
                                         'kws':  set([])}},
            'SPECTRAL_WINDOW':  {'req': {'cols': set(['NUM_CHAN',
                                                      'NAME',
                                                      'REF_FREQUENCY',
                                                      'CHAN_FREQ',
                                                      'CHAN_WIDTH',
                                                      'MEAS_FREQ_REF',
                                                      'EFFECTIVE_BW',
                                                      'RESOLUTION',
                                                      'TOTAL_BANDWIDTH',
                                                      'NET_SIDEBAND',
                                                      'IF_CONV_CHAIN',
                                                      'FREQ_GROUP',
                                                      'FREQ_GROUP_NAME',
                                                      'FLAG_ROW']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['BBC_NO',
                                                      'BBC_SIDEBAND',
                                                      'RECEIVER_ID',
                                                      'DOPPLER_ID',
                                                      'ASSOC_SPW_ID',
                                                      'ASSOC_NATURE']),
                                         'kws':  set([])}},
            'STATE':            {'req': {'cols': set(['SIG',
                                                      'REF',
                                                      'CAL',
                                                      'LOAD',
                                                      'SUB_SCAN',
                                                      'OBS_MODE',
                                                      'FLAG_ROW']),
                                         'kws':  set([])},
                                 'opt': {'cols': set([]),
                                         'kws':  set([])}}
            },
    'opt': {'DOPPLER':          {'req': {'cols': set(['DOPPLER_ID',
                                                      'SOURCE_ID',
                                                      'TRANSITION_ID',
                                                      'VELDEF']),
                                         'kws':  set([])},
                                 'opt': {'cols': set([]),
                                         'kws':  set([])}},
            'FREQ_OFFSET':      {'req': {'cols': set(['ANTENNA1',
                                                      'ANTENNA2',
                                                      'FEED_ID',
                                                      'SPECTRAL_WINDOW_ID',
                                                      'TIME',
                                                      'INTERVAL',
                                                      'OFFSET']),
                                         'kws':  set([])},
                                 'opt': {'cols': set([]),
                                         'kws':  set([])}},
            'SOURCE':           {'req': {'cols': set(['SOURCE_ID',
                                                      'TIME',
                                                      'INTERVAL',
                                                      'SPECTRAL_WINDOW_ID',
                                                      'NUM_LINES',
                                                      'NAME',
                                                      'CALIBRATION_GROUP',
                                                      'CODE',
                                                      'DIRECTION',
                                                      'PROPER_MOTION']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['POSITION',
                                                      'TRANSITION',
                                                      'REST_FREQUENCY',
                                                      'SYSVEL',
                                                      'SOURCE_MODEL',
                                                      'PULSAR_ID']),
                                         'kws':  set([])}},
            'SYSCAL':           {'req': {'cols': set(['ANTENNA_ID',
                                                      'FEED_ID',
                                                      'SPECTRAL_WINDOW_ID',
                                                      'TIME',
                                                      'INTERVAL']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['PHASE_DIFF',
                                                      'TCAL',
                                                      'TRX',
                                                      'TSKY',
                                                      'TSYS',
                                                      'TANT',
                                                      'TANT_TSYS',
                                                      'TCAL_SPECTRUM',
                                                      'TRX_SPECTRUM',
                                                      'TSKY_SPECTRUM',
                                                      'TSYS_SPECTRUM',
                                                      'TANT_SPECTRUM',
                                                      'TANT_TSYS_SPECTRUM',
                                                      'PHASE_DIFF_FLAG',
                                                      'TCAL_FLAG',
                                                      'TRX_FLAG',
                                                      'TSKY_FLAG',
                                                      'TSYS_FLAG',
                                                      'TANT_FLAG',
                                                      'TANT_TSYS_FLAG']),
                                         'kws':  set([])}},
            'WEATHER':          {'req': {'cols': set(['ANTENNA_ID',
                                                      'TIME',
                                                      'INTERVAL']),
                                         'kws':  set([])},
                                 'opt': {'cols': set(['H2O',
                                                      'IONOS_ELECTRON',
                                                      'PRESSURE',
                                                      'REL_HUMIDITY',
                                                      'TEMPERATURE',
                                                      'DEW_POINT',
                                                      'WIND_DIRECTION',
                                                      'WIND_SPEED',
                                                      'H2O_FLAG',
                                                      'IONOS_ELECTRON_FLAG',
                                                      'PRESSURE_FLAG',
                                                      'REL_HUMIDITY_FLAG',
                                                      'TEMPERATURE_FLAG',
                                                      'DEW_POINT_FLAG',
                                                      'WIND_DIRECTION_FLAG',
                                                      'WIND_SPEED_FLAG']),
                                         'kws':  set([])}}
            }
    }

possible_subtables = set(mstables['req'].keys() + mstables['opt'].keys())


def find_needed_items(musthave=set([]), listall=False):
    """
    Given the set of "must have" items, fill out needed_subtables and needed_items,
    and determine whether or not to use tb.
    """
    #print "musthave =", ", ".join(musthave)
    #print "listall =", listall
    
    needed_subtables = musthave.intersection(possible_subtables)
    needed_items = {'anywhere': set([])}  # cols and keywords
    for mh in musthave:
        mhparts = mh.split('/')
        if len(mhparts) > 1:
            if not needed_items.has_key(mhparts[0]):
                needed_items[mhparts[0]] = set([mhparts[1]])
            else:
                needed_items.add(mhparts[1])           
            if mhparts[0] != 'MAIN':
                needed_subtables.add(mhparts[0])
        elif mh not in possible_subtables:
            needed_items['anywhere'].add(mh)

    use_tb = False
    need_tb = musthave.difference(needed_subtables)
    mytb = None
    if need_tb or listall:
        try:
            use_tb = hasattr(tb, 'colnames')
            mytb = tb
        except:
            try:
                try:
                    from  casac import *
                except:
                    casacpath = glob(os.sep.join(os.environ["CASAPATH"].split() +
                                                 ['python', '2.*']))  # devs
                    casacpath.sort()
                    casacpath.reverse()
                    casacpath.extend(glob(os.sep.join([os.environ["CASAPATH"].split()[0],
                                                       'lib', 'python2.*'])))  # users
                    #print "casacpath =", "\n".join(casacpath)
                    import sys
                    sys.path.extend(casacpath)
                    import casac
                ## from taskutil import get_global_namespace
                ## my_globals = get_global_namespace()
                ## tb = my_globals['tb']
                #from casa import table as tb
                mytb = casac.table()
                use_tb = hasattr(mytb, 'colnames')
            except:
                print "Could not find the tb tool.  Try running inside a casapy session or setting PYTHONPATH to /usr/lib/casapy/.../lib/python2.*."
        if need_tb and not use_tb:
            print "Removing", ', '.join(need_tb), "from the criteria for matching."
            musthave.difference_update(need_tb)

    return needed_subtables, needed_items, use_tb, mytb
    


def matchingMSes(musthave=[], mspat="*.ms", combine='or', doprint=False,
                 freemem=False, remind=True):
    """
    Returns a dict of MSes that match musthave and mspat as in
    lsms(musthave, mspat, combine, sortfirst, remind), and whether or not it
    found the tb tool.

    If doprint=False a blurb about each ms will be printed as it is found,
    using remind as in lsms().

    If freemem=True the return dict will NOT be updated.  Note that usually
    you want freemem == doprint.
    """
        
    holderdict = {'musthave': set([s.upper() for s in musthave]),
                  'mspat':    mspat,
                  'msdict':   {},
                  'use_and':  combine.lower() == 'and',
                  'use_tb':   None,
                  'listall':  False,
                  'doprint':  doprint,
                  'remind':   remind}

    if not musthave:
        holderdict['listall'] = True
        holderdict['use_and'] = False

    nsit = find_needed_items(holderdict['musthave'], holderdict['listall'])
    holderdict['needed_subtables'] = nsit[0]
    holderdict['needed_items']     = nsit[1]
    holderdict['use_tb']           = nsit[2]
    holderdict['mytb']             = nsit[3]
    
    splitatdoubleglob = mspat.split('**/')
    if len(splitatdoubleglob) > 1:
        if splitatdoubleglob[0] == '':
            splitatdoubleglob[0] = '.'
        holderdict['mspat'] = splitatdoubleglob[1]
        os.path.walk(splitatdoubleglob[0], checkMSes, holderdict)
    else:
        checkMSes(holderdict, '', [])

    return holderdict['msdict'], holderdict['use_tb']

        
def checkMSes(holderdict, dir, files):
    """
    Updates holderdict['msdict'] with a list of MSes in dir that match
    holderdict['musthave'] and holderdict['mspat'] as in
    lsms(musthave, mspat, combine, sortfirst, remind).

    If holderdict['doprint']=True a blurb about each ms will be printed as
    it is found, using holderdict['remind'] like remind in lsms().

    If holderdict['freemem']=True holderdict['msdict'] will NOT be updated.
    Note that usually you want holderdict['freemem'] == holderdict['doprint'].
    """        
    # Yup, ignore files.  It's just a os.path.walk()ism.
    mses = glob(os.path.join(dir, holderdict['mspat']))

    musthave = holderdict.get('musthave', set([]))
    use_and = holderdict.get('use_and', False)
    listall = holderdict.get('listall', False)

    if holderdict.get('freemem'):
        retval = {}
    else:
        if not holderdict.get('msdict'):   # Initialize it so retval
            holderdict['msdict'] = {}      # can be tied to it.
        retval = holderdict['msdict']
    
    needed_subtables = holderdict.get('needed_subtables', set([]))
    needed_items = holderdict.get('needed_items', {})
    use_tb  = holderdict.get('use_tb', False)

    if holderdict.get('mytb'):
        tb = holderdict['mytb']
    
    for currms in mses:
        if currms[:2] == './':  # strip off leading ./, if present.
            currms = currms[2:]    # cosmetic.
        
        retval[currms] = {'MAIN': {}}
        keep_currms = listall

        subtabs = glob(currms + '/[A-Z]*')
        subtabs = set([s.replace(currms + '/', '', 1) for s in subtabs])
        for s in subtabs:
            retval[currms][s] = {}

        if needed_subtables:
            if use_and:
                keep_currms = needed_subtables.issubset(subtabs)
            elif needed_subtables.intersection(subtabs):
                keep_currms = True

        if use_tb and (keep_currms or listall or (not use_and)):
            subtabs_to_check = needed_subtables
            if listall or needed_items['anywhere']:
                subtabs_to_check = subtabs

            if listall or needed_items['anywhere'] or needed_items['MAIN']:
                # Start with MAIN
                try:
                    tb.open(currms)
                except Exception, e:
                    # Typically if we are here currms is too malformed for
                    # tb to handle, and e is usually "currms does not exist",
                    # which is usually incorrect.
                    #print "mses =", ", ".join(mses)
                    if str(e)[-15:] == " does not exist":
                        print "tb could not open", currms
                    else:
                        print "Error", e, "from tb.open(", currms, ")"
                    break
                    
                retval[currms]['MAIN']['cols'] = tb.colnames()
                kws = set(tb.keywordnames())
                retval[currms]['MAIN']['kws'] = kws.difference(possible_subtables)
                tb.close()

                if not listall:
                    mainitems = set(retval[currms]['MAIN']['cols'])
                    mainitems.update(retval[currms]['MAIN']['kws'])
                    if use_and:
                        keep_currms = needed_items['MAIN'].issubset(mainitems)
                    elif not keep_currms:
                        my_needed_items = set(needed_items.get('MAIN', []))
                        my_needed_items.update(needed_items.get('anywhere', []))
                        if my_needed_items.intersection(mainitems):
                            keep_currms = True

            for st in subtabs_to_check:
                stdir = currms + '/' + st
                if os.path.isdir(stdir):
                    tb.open(stdir)
                    retval[currms][st]['cols'] = tb.colnames()
                    retval[currms][st]['kws'] = set(tb.keywordnames())
                    tb.close()
                    if not listall:
                        stitems = set(retval[currms][st]['cols'])
                        stitems.update(retval[currms][st]['kws'])
                        if use_and:
                            keep_currms = needed_items[st].issubset(stitems)
                            if not keep_currms:
                                break
                        elif not keep_currms:
                            my_needed_items = set(needed_items.get(st, []))
                            my_needed_items.update(needed_items.get('anywhere', []))
                            if my_needed_items.intersection(stitems):
                                keep_currms = True
                elif st in needed_subtables:
                    keep_currms = False
                    break
            
        if not keep_currms:
            del retval[currms]
        elif holderdict.get('doprint'):
            print_ms(currms, retval[currms], listall, use_tb, holderdict['remind'])


# following, sort of, from Python cookbook, #475186
def termprops(stream):
    """
    Return whether or not stream supports colors, and a guess at its number of
    columns (in characters).
    """
    have_colors = False
    termwidth   = 80
    if hasattr(stream, "isatty") and stream.isatty():
        try:
            import curses
            curses.setupterm()
            termwidth = curses.tigetnum('cols')
            if curses.tigetnum("colors") > 2:
                have_colors = True
        except:
            pass
    return have_colors, termwidth

# Even more things came from a similar ls in python by C. Blake,
# http://pdos.csail.mit.edu/~cblake/cls/cls.py
# (I did do this in perl as a postdoc.)

def rowscols(n, nc):                    ### handle ceil(n/nc) assignment
    div, mod = divmod(n, nc)
    return div + (mod != 0), nc

if __name__ == '__main__':
    import sys
    mspat = '*.ms'
    musthave = []
    if len(sys.argv) > 1:
        mspat = sys.argv[1]
        musthave = sys.argv[2:]
    lsms(musthave, mspat)