Pipeline / pipeline / 126dbf3ffc1

Commits

Rui Xue authored 126dbf3ffc1 Merge 06 Aug 2024

Pull request #1335: PIPE-2234/PIPE-2131: selfcal vla cube imaging updates

Merge in PIPE/pipeline from PIPE-2234-selfcal-vla-cube-imaging-updates to main

* commit '1d00b7337cb2e9a0d1b2b8ee496838bff69bd6b9': (33 commits)
  PIPE-1346: fix a minor QA regression bug (unwanted niter-limit warnings) introduced in 9eaf2381eea; decrease maxproductsize to 100 GB in test recipes.
  PIPE-2275: make the heuristics-skipping conditions mutually exclusive in checkproductsize.
  PIPE-2234: fix typos from the code review feedback.
  PIPE-2234: Refactor conditions for skipping CheckProductSize heuristics.
  PIPE-2242: use the PL2024 hifa_image recipe for the selfcal restoration test.
  PIPE-2260: suppress the dubious warning message when calling the VLA restfreq heuristics method with specmode='cont'.
  PIPE-2260: use cube center frequencies in LSRK/SOURCE as tclean rest frequency settings.
  PIPE-2234: improve the logging messages in a corner case (no suitable datatype).
  PIPE-2234: replace 'Contline applied' with 'Cont. applied' in the weblog; note that selfcal is applied to CONT_SCIENCE rather than CONTLINE_SCIENCE for VLA.
  PIPE-1346: do not cont-imaging uvrange heuristics for VLA cube imaging.
  PIPE-1346: use the imaging base-class imsize heuristics for VLA cube imaging.
  PIPE-1346: use robust=2.0 for VLA cube imaging.
  PIPE-1346: add a simple QA scoring rule for VLA cube imaging.
  PIPE-2258: avoid direct use of pb-mask in selfcal_helpers.estimate_near_field_SNR().
  PIPE-2181: decrease maxproductsize to 100 GB.
  PIPE-2275: remove the explict set for context.vla_skip_mfs_and_cube_imaging.
  PIPE-2260: explicitly use the spw center frequency as per-spw rest frequency for VLA cube imaging.
  PIPE-2255: avoid writing a blank cont.dat file if hif_findcont is bypassed in the VLA cube-imaging workflow.
  PIPE-2275: use the datatype registration info for "stage skipping" in the VLA cube imaging workflow.
  PIPE-2272: suppress the "Undefined representative bandwidth" warning when running hif_findcont on VLA data.
  ...

pipeline/domain/measurementset.py

Modified

-6
+import operator
-7
+import os
-8
+import re
-9
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
-10
-11
+import numpy as np
-12
-13
+import pipeline.infrastructure as infrastructure
-14
+import pipeline.infrastructure.utils as utils
-15
+from pipeline.infrastructure import casa_tools
--
+from pipeline.infrastructure.utils import conversion
-16
-17
+if TYPE_CHECKING:  # Avoid circular import. Used only for type annotation.
-18
+    from pipeline.infrastructure.tablereader import RetrieveByIndexContainer
-19
-20
+from pipeline.infrastructure import logging
--
+from pipeline.infrastructure.utils import range_to_list
-21
-22
+from . import measures, spectralwindow
-23
+from .antennaarray import AntennaArray
-24
+from .datatype import DataType
-25
-26
+LOG = infrastructure.get_logger(__name__)
-27
-28
-29
+class MeasurementSet(object):
-30
+    """
-250
+            fields_with_name = frozenset(self.get_fields(task_arg=field))
-251
+            pool = [s for s in pool if not fields_with_name.isdisjoint(s.fields)]
-252
-253
+        if spw is not None:
-254
+            if not isinstance(spw, collections.Sequence):
-255
+                spw = (spw,)
-256
+            if isinstance(spw, str):
-257
+                if spw in ('', '*'):
-258
+                    spw = ','.join(str(spw.id) for spw in self.spectral_windows)
-259
+                if '~' in spw:
--
+                    spw = conversion.range_to_list(spw)
-+
+                    spw = utils.range_to_list(spw)
-261
+                else:
-262
+                    spw = spw.split(',')
-263
+            spw = {int(i) for i in spw}
-264
+            pool = {scan for scan in pool for scan_spw in scan.spws if scan_spw.id in spw}
-265
+            pool = sorted(pool, key=lambda s: s.id)
-266
-267
+        return pool
-268
-269
+    def get_data_description(self, spw=None, id=None):
-270
+        match = None
-389
+            except:
-390
+                LOG.log(log_level, 'Could not translate spw name %s to ID. Trying frequency matching heuristics.' %
-391
+                        self.representative_window)
-392
-393
+        if target_spwid is not None:
-394
+            return (target_source_name, target_spwid)
-395
-396
+        # Get the representative bandwidth
-397
+        #     Return if there isn't one
-398
+        if not self.representative_target[2]:
--
+            LOG.warning('Undefined representative bandwidth for data set %s' % self.basename)
-+
+            if self.antenna_array.name not in ('VLA', 'EVLA'):
-+
+                LOG.warning('Undefined representative bandwidth for data set %s' % self.basename)
-401
+            return (target_source_name, None)
-+
-403
+        target_bw = cme.frequency('TOPO',
-404
+            qa.quantity(qa.getvalue(self.representative_target[2]),
-405
+            qa.getunit(self.representative_target[2])))
-406
-407
+        # Get the representative frequency
-408
+        #     Return if there isn't one
-409
+        if not self.representative_target[1]:
-410
+            LOG.warning('Undefined representative frequency for data set %s' % self.basename)
-411
+            return (target_source_name, None)
-412
+        target_frequency = cme.frequency('BARY',
-591
+        if not science_windows_only:
-592
+            return spws
-593
-594
+        if self.antenna_array.name == 'ALMA':
-595
+            science_intents = {'TARGET', 'PHASE', 'BANDPASS', 'AMPLITUDE',
-596
+                               'POLARIZATION', 'POLANGLE', 'POLLEAKAGE',
-597
+                               'CHECK', 'DIFFGAINREF', 'DIFFGAINSRC'}
-598
+            return [w for w in spws if w.num_channels not in self.exclude_num_chans
-599
+                    and not science_intents.isdisjoint(w.intents)]
-600
--
+        if self.antenna_array.name == 'VLA' or self.antenna_array.name == 'EVLA':
-+
+        if self.antenna_array.name in ('VLA', 'EVLA'):
-602
+            science_intents = {'TARGET', 'PHASE', 'BANDPASS', 'AMPLITUDE',
-603
+                               'POLARIZATION', 'POLANGLE', 'POLLEAKAGE',
-604
+                               'CHECK'}
-605
+            return [w for w in spws if w.num_channels not in self.exclude_num_chans
-606
+                    and not science_intents.isdisjoint(w.intents) and 'POINTING' not in w.intents]
-607
-608
+        if self.antenna_array.name == 'NRO':
-609
+            science_intents = {'TARGET'}
-610
+            return [w for w in spws if not science_intents.isdisjoint(w.intents)]
-611
-1359
+            colnames = table.colnames()
-1360
+        return colnames
-1361
-1362
+    def data_colnames(self):
-1363
+        """
-1364
+        Return all data column names for this MS.
-1365
+        """
-1366
+        return [colname for colname in self.all_colnames() if colname in ('DATA', 'FLOAT_DATA', 'CORRECTED_DATA')]
-1367
-1368
+    def set_data_column(self, dtype: DataType, column: str,
--
+                        source: Optional[str]=None,
--
+                        spw: Optional[str]=None,
--
+                        overwrite: bool=False):
-+
+                        source: Optional[str] = None,
-+
+                        spw: Optional[str] = None,
-+
+                        overwrite: bool = False):
-1372
+        """
-1373
+        Set data type and column.
-1374
-1375
+        Set data type and column to MS domain object and record the available
-1376
+        data types per (source,spw) tuple. If source or spw are unset, they
-1377
+        will be expanded to all available values.
-1378
-1379
+        Args:
-1380
+            dtype: data type to set
-1381
+            column: name of column in MS associated with the data type
-1393
+                column is already assigned to a type and would not
-1394
+                be overwritten.
-1395
+        """
-1396
+        # Check existence of the column
-1397
+        colnames = self.data_colnames()
-1398
+        if column not in colnames:
-1399
+            raise ValueError('Column {} does not exist in {}'.format(column, self.basename))
-1400
-1401
+        # Check if data type is already associated with another column
-1402
+        if not overwrite and dtype in self.data_column and self.get_data_column(dtype) != column:
--
+            raise ValueError('Data type {} is already associated with column {} in {}'.format(dtype, self.get_data_column(dtype), self.basename))
-+
+            raise ValueError('Data type {} is already associated with column {} in {}'.format(
-+
+                dtype, self.get_data_column(dtype), self.basename))
-1405
-1406
+        # Check if column is already assigned to another data type
-1407
+        if not overwrite and column in self.data_column.values() and self.get_data_column(dtype) != column:
--
+            raise ValueError('Column {} is already associated with data type {} in {}'.format(column, [k for k,v in self.data_column.items() if v == column][0], self.basename))
--
--
+        # Update data types per (source,spw) selection
--
+        if source is None:
--
+            source_names = ','.join(utils.dequote(s.name) for s in self.sources)
--
+        else:
--
+            # Check for empty or blank strings
--
+            if source.strip():
--
+                source_names = ','.join(utils.dequote(s.strip()) for s in source.split(','))
--
+            else:
--
+                source_names = ','.join(utils.dequote(s.name) for s in self.sources)
-+
+            raise ValueError('Column {} is already associated with data type {} in {}'.format(
-+
+                column, [k for k, v in self.data_column.items() if v == column][0], self.basename))
-1410
--
+        if spw is None:
--
+            spw_ids = ','.join(str(s.id) for s in self.spectral_windows)
--
+        else:
--
+            # Check for empty or blank strings
--
+            if spw.strip():
--
+                spw_ids = spw
--
+            else:
--
+                spw_ids = ','.join(str(s.id) for s in self.spectral_windows)
-+
+        source_name_list = self._source_select_to_list(source)
-+
+        spw_id_list = self._spw_select_to_list(spw)
-1413
--
+        for source_name in source_names.split(','):
--
+            for spw_id in map(int, range_to_list(spw_ids)):
-+
+        # Update data types per (source,spw) selection
-+
+        for source_name in source_name_list:
-+
+            for spw_id in spw_id_list:
-1417
+                key = (source_name, spw_id)
-1418
+                if key in self.data_types_per_source_and_spw:
-1419
+                    if dtype not in self.data_types_per_source_and_spw[key]:
-1420
+                        self.data_types_per_source_and_spw[key].append(dtype)
-1421
+                else:
-1422
+                    self.data_types_per_source_and_spw[key] = [dtype]
-1423
-1424
+        # Check for existing column registration and remove it
--
+        column_keys = [k for k,v in self.data_column.items() if v == column]
--
+        if column_keys!= []:
-+
+        column_keys = [k for k, v in self.data_column.items() if v == column]
-+
+        if column_keys != []:
-1427
+            for k in column_keys:
--
+                del(self.data_column[k])
-+
+                del (self.data_column[k])
-1429
-1430
+        # Update MS domain object
-1431
+        if dtype not in self.data_column:
-1432
+            self.data_column[dtype] = column
-1433
+            LOG.info('Updated data column information of {}. Set {} to column {}'.format(self.basename, dtype, column))
-1434
--
+    def get_data_column(self, dtype: DataType, source: Optional[str]=None, spw: Optional[str]=None) -> Optional[str]:
-+
+    def get_data_column(self, dtype: DataType, source: Optional[str] = None, spw: Optional[str] = None) -> Optional[str]:
-1436
+        """
-1437
+        Return the column name associated with a DataType in an MS domain object.
-1438
-1439
+        Args:
-1440
+            dtype: DataType to fetch column name for
--
+            source: Comma separated list of source names to filter for.
--
+            spw: Comma separated list of real spw IDs to filter for.
-+
+            source: source names (comma separated name selection string) to filter for.
-+
+                    If unset, all sources will be used.
-+
+            spw: spectral windows (comma separated real spw ID selection string) to filter for.
-+
+                 If unset, all real spw IDs will be used.
-1445
-1446
+            If source and spw are both unset, the method will just look
-1447
+            at the MS data type and column information. If one or both
-1448
+            parameters are set, it will require all (source,spw)
-1449
+            combinations to have data of the requested data type.
-1450
-1451
+        Returns:
-1452
+            A name of column of a dtype. Returns None if dtype is not defined
-1453
+            in the MS.
-1454
+        """
-1455
+        if dtype not in self.data_column.keys():
-1456
+            return None
-1457
-1458
+        if source is None and spw is None:
-1459
+            return self.data_column[dtype]
-1460
--
+        if source is None:
--
+            source_names = ','.join(utils.dequote(s.name) for s in self.sources)
--
+        else:
--
+            source_names = ','.join(utils.dequote(s.strip()) for s in source.split(','))
--
--
+        if spw is None:
--
+            spw_ids = ','.join(str(s.id) for s in self.spectral_windows)
--
+        else:
--
+            spw_ids = spw
-+
+        source_name_list = self._source_select_to_list(source)
-+
+        spw_id_list = self._spw_select_to_list(spw)
-1463
-1464
+        # Check all (source,spw) combinations
-1465
+        data_exists_for_all_source_spw_combinations = True
--
+        for source_name in source_names.split(','):
--
+            for spw_id in map(int, spw_ids.split(',')):
-+
+        for source_name in source_name_list:
-+
+            for spw_id in spw_id_list:
-1468
+                key = (source_name, spw_id)
-1469
+                if dtype not in self.data_types_per_source_and_spw.get(key, []):
-1470
+                    data_exists_for_all_source_spw_combinations = False
-1471
-1472
+        if data_exists_for_all_source_spw_combinations:
-1473
+            return self.data_column[dtype]
-1474
+        else:
-1475
+            return None
-1476
--
+    def get_data_type(self, column: str, source: Optional[str]=None, spw: Optional[str]=None) -> Optional[DataType]:
-+
+    def get_data_type(self, column: str, source: Optional[str] = None, spw: Optional[str] = None) -> Optional[DataType]:
-1478
+        """
-1479
+        Return the DataType associated with a column in an MS domain object.
-1480
-1481
+        Args:
-1482
+            column: name of column in MS
--
+            source: Comma separated list of source names to filter for.
--
+            spw: Comma separated list of real spw IDs to filter for.
-+
+            source: source names (comma separated name selection string) to filter for.
-+
+                    If unset, all sources will be used.
-+
+            spw: spectral windows (comma separated real spw ID selection string) to filter for.
-+
+                 If unset, all real spw IDs will be used.
-1487
-1488
+            If source and spw are both unset, the method will just look
-1489
+            at the MS data type and column information. If one or both
-1490
+            parameters are set, it will require all (source,spw)
-1491
+            combinations to have data of the requested data type.
-1492
-1493
+        Returns:
-1494
+            The DataType associated with the column name. Returns None
-1495
+            if dtype is not defined in the MS or in the source/spw
-1496
+            selection.
-1497
+        """
-1498
+        if column not in self.data_column.values():
-1499
+            return None
-1500
-1501
+        # Invert dictionary. This should not lead to wrong mappings
-1502
+        # because data types and columns have a 1:1 relation.
-1503
+        data_type = {v: k for k, v in self.data_column.items()}
-1504
-1505
+        if source is None and spw is None:
-1506
+            return data_type[column]
-1507
--
+        if source is None:
--
+            source_names = ','.join(utils.dequote(s.name) for s in self.sources)
--
+        else:
--
+            source_names = ','.join(utils.dequote(s.strip()) for s in source.split(','))
--
--
+        if spw is None:
--
+            spw_ids = ','.join(str(s.id) for s in self.spectral_windows)
--
+        else:
--
+            spw_ids = spw
-+
+        source_name_list = self._source_select_to_list(source)
-+
+        spw_id_list = self._spw_select_to_list(spw)
-1510
-1511
+        # Check all (source,spw) combinations
-1512
+        data_exists_for_all_source_spw_combinations = True
-1513
+        column_dtype = data_type[column]
--
+        for source_name in source_names.split(','):
--
+            for spw_id in map(int, spw_ids.split(',')):
-+
+        for source_name in source_name_list:
-+
+            for spw_id in spw_id_list:
-1516
+                key = (source_name, spw_id)
-1517
+                if column_dtype not in self.data_types_per_source_and_spw.get(key, []):
-1518
+                    data_exists_for_all_source_spw_combinations = False
-1519
-1520
+        if data_exists_for_all_source_spw_combinations:
-1521
+            return column_dtype
-1522
+        else:
-1523
+            return None
-+
-+
+    def _source_select_to_list(self, source_select: Union[str, None]) -> List[str]:
-+
+        """
-+
+        Convert a CASA-style source selection string to a list of source names.
-+
-+
+        Args:
-+
+            source_select: source string to convert
-+
-+
+        Returns:
-+
+            A list of source names (as strings)
-+
+        """
-+
-+
+        if source_select is None or not source_select.strip():
-+
+            # if None or empty or blank selection string, use all sources
-+
+            source_list = [utils.dequote(s.name) for s in self.sources]
-+
+        else:
-+
+            source_list = [utils.dequote(s.strip()) for s in source_select.split(',')]
-+
-+
+        return source_list
-+
-+
+    def _spw_select_to_list(self, spw_select: Union[str, None]) -> List[int]:
-+
+        """
-+
+        Convert a CASA-style spw selection string to a list of spw IDs.
-+
-+
+        Args:
-+
+            spw_select: spw selection string to convert
-+
-+
+        Returns:
-+
+            A list of spw IDs (as integers)
-+
+        """
-+
-+
+        if spw_select is None or not spw_select.strip():
-+
+            # if None or empty or blank selection string, use all spws
-+
+            spw_list = [s.id for s in self.spectral_windows]
-+
+        else:
-+
+            spw_list = utils.range_to_list(spw_select)
-+
-+
+        return spw_list

pipeline

Commits

Jira Issues

Add shortcut