afscgap.inference

Tools for inferring missing, negative, or zero catch records.

This file is part of afscgap released under the BSD 3-Clause License. See LICENSE.md.

View Source

   1"""
   2Tools for inferring missing, negative, or zero catch records.
   3
   4(c) 2023 Regents of University of California / The Eric and Wendy Schmidt Center
   5for Data Science and the Environment at UC Berkeley.
   6
   7This file is part of afscgap released under the BSD 3-Clause License. See
   8LICENSE.md.
   9"""
  10import copy
  11import csv
  12import io
  13import itertools
  14import queue
  15import typing
  16
  17import afscgap.convert
  18import afscgap.client
  19import afscgap.cursor
  20import afscgap.http_util
  21import afscgap.model
  22import afscgap.query_util
  23
  24from afscgap.typesdef import OPT_FLOAT
  25from afscgap.typesdef import OPT_INT
  26from afscgap.typesdef import OPT_REQUESTOR
  27from afscgap.typesdef import OPT_STR
  28
  29DEFAULT_HAULS_URL = 'https://pyafscgap.org/community/hauls.csv'
  30SPECIES_DICT = typing.Dict[str, afscgap.model.SpeciesRecord]
  31
  32HAUL_LIST = typing.List[afscgap.model.Haul]
  33OPT_HAUL_LIST = typing.Optional[HAUL_LIST]
  34HAUL_FILTERABLE_FIELDS = [
  35    'year',
  36    'srvy',
  37    'survey',
  38    'survey_id',
  39    'cruise',
  40    'haul',
  41    'stratum',
  42    'station',
  43    'vessel_name',
  44    'vessel_id',
  45    'date_time',
  46    'latitude_dd',
  47    'longitude_dd',
  48    'bottom_temperature_c',
  49    'surface_temperature_c',
  50    'depth_m',
  51    'distance_fished_km',
  52    'net_width_m',
  53    'net_height_m',
  54    'area_swept_ha',
  55    'duration_hr'
  56]
  57
  58PARAMS_CHECKER = typing.Callable[[afscgap.model.Haul], bool]
  59
  60
  61def build_inference_cursor(params: dict, inner_cursor: afscgap.cursor.Cursor,
  62    requestor: OPT_REQUESTOR = None, hauls_url: afscgap.client.OPT_STR = None,
  63    hauls_prefetch: OPT_HAUL_LIST = None):
  64    """Build a cursor which infers zero catch records.
  65
  66    Args:
  67        params: Dictionary of filters to apply to the query where a value of
  68            None means no filter should be applied on that field.
  69        inner_cursor: Cursor which yields records which, when appearing, should
  70            not be later inferred as zero catch records.
  71        requestor: Strategy to make HTTP GET requests. If None, will default
  72            to requests.get.
  73        hauls_url: The URL at which the Hauls file can be found or None to use
  74            a default. Defaults to None.
  75        hauls_prefetch: List of hauls data to use. If None, will request from
  76            hauls_url. If not None, will use this instead.
  77
  78    Returns:
  79        Cursor which 1) first iterates over the inner_cursor and then
  80        2) provides inferred zero catch records (for any hauls without observed
  81        data from inner_cursor for a species).
  82    """
  83    params_safe = copy.deepcopy(params)
  84
  85    if 'date_time' in params_safe:
  86        params_safe['date_time'] = afscgap.convert.convert_from_iso8601(
  87            params_safe['date_time']
  88        )
  89
  90    if hauls_prefetch is not None:
  91        hauls_data = hauls_prefetch
  92    else:
  93        hauls_data = get_hauls_data(
  94            params_safe,
  95            requestor=requestor,
  96            hauls_url=hauls_url
  97        )
  98
  99    return NegativeInferenceCursorDecorator(inner_cursor, hauls_data)
 100
 101
 102def build_params_checker(params: dict) -> PARAMS_CHECKER:
 103    """Build a function that checks if a single Haul record should be filtered.
 104
 105    Args:
 106        params: Dictionary of filters to apply to the query where a value of
 107            None means no filter should be applied on that field.
 108
 109    Returns:
 110        Function which returns true if the record given to it should be included
 111        (is not filtered out) or false if it should be excluded from results to
 112        meet a query requirement (is filtered out).
 113    """
 114
 115    def build_query_function(key: str, checker):
 116        return lambda target: checker(target[key])
 117
 118    params_py = afscgap.query_util.interpret_query_to_py(params)
 119    params_py_items = params_py.items()
 120    params_py_items_given = filter(lambda x: x[1] is not None, params_py_items)
 121    params_py_items_valid = filter(
 122        lambda x: x[0] in HAUL_FILTERABLE_FIELDS,
 123        params_py_items_given
 124    )
 125    params_funcs = map(
 126        lambda x: build_query_function(x[0], x[1]),
 127        params_py_items_valid
 128    )
 129    params_funcs_realized = list(params_funcs)
 130
 131    def check_all(target: afscgap.model.Haul) -> bool:
 132        target_dict = target.to_dict()
 133        not_allowed = filter(
 134            lambda x: not x(target_dict),
 135            params_funcs_realized
 136        )
 137        num_not_allowed = sum(map(lambda x: 1, not_allowed))
 138        return num_not_allowed == 0
 139
 140    return check_all
 141
 142
 143def get_hauls_data(params: dict, requestor: OPT_REQUESTOR = None,
 144    hauls_url: afscgap.client.OPT_STR = None) -> HAUL_LIST:
 145    """Download Hauls from a URL and apply a filter specified by params.
 146
 147    Args:
 148        params: Dictionary of filters to apply to the query where a value of
 149            None means no filter should be applied on that field.
 150        requestor: Strategy to make HTTP GET requests. If None, will default
 151            to requests.get.
 152        hauls_url: The URL at which the Hauls file can be found or None to use
 153            a default. Defaults to None.
 154
 155    Returns:
 156        List of Haul data after having applied the filters described by params.
 157    """
 158    if hauls_url is None:
 159        hauls_url = DEFAULT_HAULS_URL
 160
 161    params_checker = build_params_checker(params)
 162
 163    if requestor is None:
 164        requestor = afscgap.http_util.build_requestor()
 165
 166    response = requestor(hauls_url)
 167    afscgap.http_util.check_result(response)
 168
 169    response.encoding = 'utf-8'
 170    response_io = io.StringIO(response.text, newline='')
 171
 172    response_rows = csv.DictReader(response_io)
 173    response_hauls = map(parse_haul, response_rows)
 174    response_hauls_filtered = filter(params_checker, response_hauls)
 175
 176    return list(response_hauls_filtered)
 177
 178
 179class NegativeInferenceCursorDecorator(afscgap.cursor.Cursor):
 180    """Cursor augmenting another cursor with inferred negative records.
 181
 182    Cursor which exausts an inner cursor and then supplies inferred zero catch
 183    records. Specifically, a Cursor which 1) first iterates over the
 184    inner_cursor and then 2) provides inferred zero catch records (for any hauls
 185    without observed data from inner_cursor for a species).
 186    """
 187
 188    def __init__(self, inner_cursor: afscgap.cursor.Cursor,
 189        hauls_data: HAUL_LIST):
 190        """Decorate a cursor.
 191
 192        Args:
 193            inner_cursor: The cursor to augment and from which to observe
 194                presence data.
 195            hauls_data: Metadata on all hauls relevant to the query.
 196        """
 197        self._inner_cursor = inner_cursor
 198        self._hauls_data = hauls_data
 199
 200        self._started_inference = False
 201        self._inferences_iter: typing.Iterator[afscgap.model.Record] = iter([])
 202
 203        self._species_seen: SPECIES_DICT = dict()
 204        self._species_hauls_seen: typing.Set[str] = set()
 205        self._ak_survey_ids: typing.Dict[str, int] = dict()
 206
 207    def get_base_url(self) -> str:
 208        """Get the URL at which the first page of query results can be found.
 209
 210        Returns:
 211            The URL for the query without pagination information.
 212        """
 213        return self._inner_cursor.get_base_url()
 214
 215    def get_limit(self) -> OPT_INT:
 216        """Get the page size limit.
 217
 218        Returns:
 219            The maximum number of records to return per page.
 220        """
 221        return self._inner_cursor.get_limit()
 222
 223    def get_start_offset(self) -> OPT_INT:
 224        """Get the number of inital records to ignore.
 225
 226        Returns:
 227            The number of records being skipped at the start of the result set.
 228        """
 229        return self._inner_cursor.get_start_offset()
 230
 231    def get_filtering_incomplete(self) -> bool:
 232        """Determine if this cursor is silently filtering incomplete records.
 233
 234        Returns:
 235            Flag indicating if incomplete records should be silently filtered.
 236            If true, they will not be returned during iteration and placed in
 237            the queue at get_invalid(). If false, they will be returned and
 238            those incomplete records' get_complete() will return false.
 239        """
 240        return self._inner_cursor.get_filtering_incomplete()
 241
 242    def get_page_url(self, offset: OPT_INT = None,
 243        limit: OPT_INT = None) -> str:
 244        """Get a URL at which a page can be found using this cursor's base url.
 245
 246        Args:
 247            offset: The number of records to skip prior to the page.
 248            limit: The maximum number of records to return in the page.
 249        Returns:
 250            URL at which the requested page can be found.
 251        """
 252        return self._inner_cursor.get_page_url(offset=offset, limit=limit)
 253
 254    def get_page(self, offset: OPT_INT = None,
 255        limit: OPT_INT = None,
 256        ignore_invalid: bool = False) -> typing.List[afscgap.model.Record]:
 257        """Get a page using this cursor's base url.
 258
 259        Args:
 260            offset: The number of records to skip prior to the page.
 261            limit: The maximum number of records to return in the page.
 262            ignore_invalid: Flag indicating how to handle invalid records. If
 263                true, will silently throw away records which could not be
 264                parsed. If false, will raise an exception if a record can not
 265                be parsed.
 266
 267        Returns:
 268            Results from the page which, regardless of ignore_invalid, may
 269            contain a mixture of complete and incomplete records.
 270        """
 271        return self._inner_cursor.get_page(
 272            offset=offset,
 273            limit=limit,
 274            ignore_invalid=ignore_invalid
 275        )
 276
 277    def get_invalid(self) -> 'queue.Queue[dict]':
 278        """Get a queue of invalid / incomplete records found so far.
 279
 280        Returns:
 281            Queue with dictionaries containing the raw data returned from the
 282            API that did not have valid values for all required fields. Note
 283            that this will include incomplete records as well if
 284            get_filtering_incomplete() is true and will not contain incomplete
 285            records otherwise.
 286        """
 287        return self._inner_cursor.get_invalid()
 288
 289    def to_dicts(self) -> typing.Iterable[dict]:
 290        """Create an iterator which converts Records to dicts.
 291
 292        Returns:
 293            Iterator which returns dictionaries instead of Record objects but
 294            has otherwise the same beahavior as iterating in this Cursor
 295            directly.
 296        """
 297        return self._inner_cursor.to_dicts()
 298
 299    def get_next(self) -> typing.Optional[afscgap.model.Record]:
 300        """Get the next value for this Cursor.
 301
 302        Returns:
 303            The next value waiting if cached in the cursor's results queue or
 304            as just retrieved from a new page gathered by HTTP request. Will
 305            return None if no remain.
 306        """
 307        if self._started_inference:
 308            return self._get_next_inferred()
 309        else:
 310            next_record_maybe = self._inner_cursor.get_next()
 311
 312            if next_record_maybe:
 313                self._record_record_meta(next_record_maybe)
 314                return next_record_maybe
 315            else:
 316                self._start_inference()
 317                return self._get_next_inferred()
 318
 319    def _record_record_meta(self, record: afscgap.model.Record):
 320        """Record metadata from a record, indicating a haul / species was seen.
 321
 322        Args:
 323            record: The record observed.
 324        """
 325        key_with_species = self._get_haul_key(
 326            record,
 327            species=record.get_scientific_name()
 328        )
 329        self._species_hauls_seen.add(key_with_species)
 330
 331        scientific_name = record.get_scientific_name()
 332        common_name = record.get_common_name()
 333        species_code = record.get_species_code()
 334        tsn = record.get_tsn_maybe()
 335
 336        self._species_seen[scientific_name] = afscgap.model.SpeciesRecord(
 337            scientific_name,
 338            common_name,
 339            species_code,
 340            tsn
 341        )
 342
 343        survey = record.get_survey()
 344        ak_survey_id = record.get_ak_survey_id()
 345
 346        self._ak_survey_ids[survey] = ak_survey_id
 347
 348    def _get_haul_key(self, record: afscgap.model.HaulKeyable,
 349        species: OPT_STR = None) -> str:
 350        """Get a string uniquely identifying an individual haul.
 351
 352        Args:
 353            record: The record from which to derive a haul key.
 354            speices: If given, include the species in the key. If not given, the
 355                key will refer to the entire haul across all species. Note that
 356                this should be the scientific name for a species.
 357
 358        Returns:
 359            String uniquely identifying a haul across the entire dataset.
 360        """
 361        ship_info_vals = [
 362            record.get_year(),
 363            record.get_vessel_id(),
 364            record.get_cruise(),
 365            record.get_haul()
 366        ]
 367        ship_info_vals_int = map(lambda x: round(x), ship_info_vals)
 368        ship_info_vals_str = map(str, ship_info_vals_int)
 369        ship_info_vals_csv = ','.join(ship_info_vals_str)
 370
 371        without_species = '%s:%s' % (record.get_srvy(), ship_info_vals_csv)
 372
 373        if species:
 374            return '%s/%s' % (without_species, species)
 375        else:
 376            return without_species
 377
 378    def _start_inference(self):
 379        """Prepare to start inferrence.
 380
 381        Indicate that the inner cursor is exhaused, preparing to run inferrence.
 382        """
 383        hauls_seen_with_key = map(
 384            lambda x: (self._get_haul_key(x), x),
 385            self._hauls_data
 386        )
 387        hauls_seen_by_key = dict(hauls_seen_with_key)
 388
 389        scientific_names_seen = self._species_seen.keys()
 390        missing_keys = self._get_missing_keys(
 391            hauls_seen_by_key.keys(),
 392            scientific_names_seen,
 393            self._species_hauls_seen
 394        )
 395
 396        missing_haul_keys_and_species_tuple = map(
 397            lambda x: x.split('/'),
 398            missing_keys
 399        )
 400        missing_haul_keys_and_species = map(
 401            lambda x: {'haulKey': x[0], 'species': x[1]},
 402            missing_haul_keys_and_species_tuple
 403        )
 404        missing_hauls_and_species = map(
 405            lambda x: {
 406                'haul': hauls_seen_by_key[x['haulKey']],
 407                'species': x['species']
 408            },
 409            missing_haul_keys_and_species
 410        )
 411
 412        def make_inference_record(target: typing.Dict) -> afscgap.model.Record:
 413            scientific_name = target['species']
 414            haul = target['haul']
 415
 416            species_record = self._species_seen[scientific_name]
 417            common_name = species_record.get_common_name()
 418            species_code = species_record.get_species_code()
 419            tsn = species_record.get_tsn()
 420
 421            ak_survey_id = self._ak_survey_ids.get(haul.get_survey(), None)
 422
 423            return ZeroCatchHaulDecorator(
 424                haul,
 425                scientific_name,
 426                common_name,
 427                species_code,
 428                tsn,
 429                ak_survey_id
 430            )
 431
 432        inference_map = map(make_inference_record, missing_hauls_and_species)
 433
 434        self._inferences_iter = iter(inference_map)
 435        self._started_inference = True
 436
 437    def _get_next_inferred(self) -> typing.Optional[afscgap.model.Record]:
 438        """Get the next inferred zero catch record.
 439
 440        Raises:
 441            StopIteration: Raised if no records left to infer.
 442            AssertionError: Raised if the cursor has not yet started inference.
 443
 444        Returns:
 445            Next inferred absence data record.
 446        """
 447        assert self._started_inference
 448
 449        try:
 450            return next(self._inferences_iter)
 451        except StopIteration:
 452            return None
 453
 454    def _get_missing_keys(self, hauls_seen: typing.Iterable[str],
 455        scientific_names_seen: typing.Iterable[str],
 456        species_hauls_seen: typing.Set[str]) -> typing.Iterable[str]:
 457        """Determine which species haul keys were expected but not observed.
 458
 459        Args:
 460            hauls_seen: The haus seen (non-species keys).
 461            scientific_names_seen: The name of the scientific names for species
 462                observed across the entire dataset yielded by the user query.
 463            species_hauls_seen: The haul / species keys or combinations actually
 464                observed.
 465
 466        Returns:
 467            Haul / species keys expected but not found in species_hauls_seen
 468            given the hauls described in hauls_seen and the species seen in
 469            scientific_names_seen.
 470        """
 471        hauls_with_names = itertools.product(
 472            hauls_seen,
 473            scientific_names_seen
 474        )
 475        hauls_with_names_str = map(lambda x: '%s/%s' % x, hauls_with_names)
 476        missing_keys = filter(
 477            lambda x: x not in species_hauls_seen,
 478            hauls_with_names_str
 479        )
 480        return missing_keys
 481
 482
 483class ZeroCatchHaulDecorator(afscgap.model.Record):
 484    """Decorator for a Haul that makes it operate like a zero catch Record."""
 485
 486    def __init__(self, haul: afscgap.model.Haul, scientific_name: str,
 487        common_name: str, species_code: float, tsn: OPT_INT,
 488        ak_survey_id: OPT_INT):
 489        """Decorate a Haul to conform to the Record interface.
 490
 491        Args:
 492            haul: The haul to decorate.
 493            scientific_name: The scientific name of the species to be associated
 494                with this record.
 495            common_name: The common name of the species to be associated with
 496                this record.
 497            species_code: The species code of the species to be associated with
 498                this record.
 499            tsn: The taxonomic information system species code to be associated
 500                with this record if known.
 501            ak_survey_id: The AK survey ID to be associated with this record if
 502                known.
 503        """
 504        self._haul = haul
 505        self._scientific_name = scientific_name
 506        self._common_name = common_name
 507        self._species_code = species_code
 508        self._tsn = tsn
 509        self._ak_survey_id = ak_survey_id
 510
 511    def get_year(self) -> float:
 512        """Get the year of the start date for the haul.
 513
 514        Returns:
 515            Year for the haul.
 516        """
 517        return self._haul.get_year()
 518
 519    def get_srvy(self) -> str:
 520        """Get the field labeled as srvy in the API.
 521
 522        Returns:
 523            The name of the survey in which this haul was conducted. NBS (N
 524            Bearing Sea), EBS (SE Bearing Sea), BSS (Bearing Sea Slope), or GOA
 525            (Gulf of Alaska)
 526        """
 527        return self._haul.get_srvy()
 528
 529    def get_survey(self) -> str:
 530        """Get the field labeled as survey in the API.
 531
 532        Returns:
 533            Long form description of the survey in which the haul was conducted.
 534        """
 535        return self._haul.get_survey()
 536
 537    def get_survey_id(self) -> float:
 538        """Get the field labeled as survey_id in the API.
 539
 540        Returns:
 541            Unique numeric ID for the survey.
 542        """
 543        return self._haul.get_survey_id()
 544
 545    def get_cruise(self) -> float:
 546        """Get the field labeled as cruise in the API.
 547
 548        Returns:
 549            An ID uniquely identifying the cruise in which the haul was made.
 550            Multiple cruises in a survey.
 551        """
 552        return self._haul.get_cruise()
 553
 554    def get_haul(self) -> float:
 555        """Get the field labeled as haul in the API.
 556
 557        Returns:
 558            An ID uniquely identifying the haul. Multiple hauls per cruises.
 559        """
 560        return self._haul.get_haul()
 561
 562    def get_stratum(self) -> float:
 563        """Get the field labeled as stratum in the API.
 564
 565        Returns:
 566            Unique ID for statistical area / survey combination as described in
 567            the metadata or 0 if an experimental tow.
 568        """
 569        return self._haul.get_stratum()
 570
 571    def get_station(self) -> str:
 572        """Get the field labeled as station in the API.
 573
 574        Returns:
 575            Station associated with the survey.
 576        """
 577        return self._haul.get_station()
 578
 579    def get_vessel_name(self) -> str:
 580        """Get the field labeled as vessel_name in the API.
 581
 582        Returns:
 583            Unique ID describing the vessel that made this haul. Note this is
 584            left as a string but, in practice, is likely numeric.
 585        """
 586        return self._haul.get_vessel_name()
 587
 588    def get_vessel_id(self) -> float:
 589        """Get the field labeled as vessel_id in the API.
 590
 591        Returns:
 592            Name of the vessel at the time the haul was made. Note that there
 593            may be multiple names potentially associated with a vessel ID.
 594        """
 595        return self._haul.get_vessel_id()
 596
 597    def get_date_time(self) -> str:
 598        """Get the field labeled as date_time in the API.
 599
 600        Returns:
 601            The date and time of the haul which has been attempted to be
 602            transformed to an ISO 8601 string without timezone info. If it
 603            couldn’t be transformed, the original string is reported.
 604        """
 605        return self._haul.get_date_time()
 606
 607    def get_latitude(self, units: str = 'dd') -> float:
 608        """Get the field labeled as latitude_dd in the API.
 609
 610        Args:
 611            units: The units to return this value in. Only supported is dd for
 612                degrees. Deafults to dd.
 613
 614        Returns:
 615            Latitude in decimal degrees associated with the haul.
 616        """
 617        return afscgap.model.assert_float_present(
 618            afscgap.convert.convert_degrees(
 619                self._haul.get_latitude_dd(),
 620                units
 621            )
 622        )
 623
 624    def get_longitude(self, units: str = 'dd') -> float:
 625        """Get the field labeled as longitude_dd in the API.
 626
 627        Args:
 628            units: The units to return this value in. Only supported is dd for
 629                degrees. Deafults to dd.
 630
 631        Returns:
 632            Longitude in decimal degrees associated with the haul.
 633        """
 634        return afscgap.model.assert_float_present(
 635            afscgap.convert.convert_degrees(
 636                self._haul.get_longitude_dd(),
 637                units
 638            )
 639        )
 640
 641    def get_species_code(self) -> float:
 642        """Get the field labeled as species_code in the API.
 643
 644        Returns:
 645            Unique ID associated with the species observed.
 646        """
 647        return self._species_code
 648
 649    def get_common_name(self) -> str:
 650        """Get the field labeled as common_name in the API.
 651
 652        Returns:
 653            The “common name” associated with the species observed. Example:
 654            Pacific glass shrimp.
 655        """
 656        return self._common_name
 657
 658    def get_scientific_name(self) -> str:
 659        """Get the field labeled as scientific_name in the API.
 660
 661        Returns:
 662            The “scientific name” associated with the species observed. Example:
 663            Pasiphaea pacifica.
 664        """
 665        return self._scientific_name
 666
 667    def get_taxon_confidence(self) -> str:
 668        """Get rating of taxon identification confidence.
 669
 670        Returns:
 671            Always returns Unassessed.
 672        """
 673        return 'Unassessed'
 674
 675    def get_cpue_weight_maybe(self, units: str = 'kg/ha') -> OPT_FLOAT:
 676        """Get a field labeled as cpue_* in the API.
 677
 678        Args:
 679            units: The desired units for the catch per unit effort. Options:
 680                kg/ha, kg/km2, kg1000/km2. Defaults to kg/ha.
 681
 682        Returns:
 683            Catch weight divided by net area (in given units) if available. See
 684            metadata. None if could not interpret as a float. If an inferred
 685            zero catch record, will be zero.
 686        """
 687        return 0
 688
 689    def get_cpue_count_maybe(self, units: str = 'kg/ha') -> OPT_FLOAT:
 690        """Get the field labeled as cpue_* in the API.
 691
 692        Get the catch per unit effort from the record with one of the following
 693        units: kg/ha, kg/km2, kg1000/km2.
 694
 695        Args:
 696            units: The desired units for the catch per unit effort. Options:
 697                count/ha, count/km2, and count1000/km2. Defaults to count/ha.
 698
 699        Returns:
 700            Catch weight divided by net area (in given units) if available. See
 701            metadata. None if could not interpret as a float. If an inferred
 702            zero catch record, will be zero.
 703        """
 704        return 0
 705
 706    def get_weight_maybe(self, units='kg') -> OPT_FLOAT:
 707        """Get the field labeled as weight_kg in the API.
 708
 709        Args:
 710            units: The units in which the weight should be returned. Options are
 711                g, kg for grams and kilograms respectively. Deafults to kg.
 712
 713        Returns:
 714            Taxon weight if available. See metadata. None if could not
 715            interpret as a float. If an inferred zero catch record, will be
 716            zero.
 717        """
 718        return 0
 719
 720    def get_count_maybe(self) -> OPT_FLOAT:
 721        """Get total number of organism individuals in haul.
 722
 723        Returns:
 724            Always returns 0.
 725        """
 726        return 0
 727
 728    def get_bottom_temperature_maybe(self, units: str = 'c') -> OPT_FLOAT:
 729        """Get the field labeled as bottom_temperature_c in the API.
 730
 731        Args:
 732            units: The units in which the temperature should be returned.
 733                Options: c or f for Celcius and Fahrenheit respectively.
 734                Defaults to c.
 735
 736        Returns:
 737            Bottom temperature associated with observation / inferrence if
 738            available in desired units. None if not given or could not interpret
 739            as a float.
 740        """
 741        return afscgap.convert.convert_temperature(
 742            self._haul.get_bottom_temperature_c_maybe(),
 743            units
 744        )
 745
 746    def get_surface_temperature_maybe(self, units: str = 'c') -> OPT_FLOAT:
 747        """Get the field labeled as surface_temperature_c in the API.
 748
 749        Args:
 750            units: The units in which the temperature should be returned.
 751                Options: c or f for Celcius and Fahrenheit respectively.
 752                Defaults to c.
 753
 754        Returns:
 755            Surface temperature associated with observation / inferrence if
 756            available. None if not given or could not interpret as a float.
 757        """
 758        return afscgap.convert.convert_temperature(
 759            self._haul.get_surface_temperature_c_maybe(),
 760            units
 761        )
 762
 763    def get_depth(self, units: str = 'm') -> float:
 764        """Get the field labeled as depth_m in the API.
 765
 766        Args:
 767            units: The units in which the distance should be returned. Options:
 768                m or km for meters and kilometers respectively. Defaults to m.
 769
 770        Returns:
 771            Depth of the bottom.
 772        """
 773        return afscgap.model.assert_float_present(
 774            afscgap.convert.convert_distance(self._haul.get_depth_m(), units)
 775        )
 776
 777    def get_distance_fished(self, units: str = 'm') -> float:
 778        """Get the field labeled as distance_fished_km in the API.
 779
 780        Args:
 781            units: The units in which the distance should be returned. Options:
 782                m or km for meters and kilometers respectively. Defaults to km.
 783
 784        Returns:
 785            Distance of the net fished.
 786        """
 787        return afscgap.model.assert_float_present(
 788            afscgap.convert.convert_distance(
 789                self._haul.get_distance_fished_km() * 1000,
 790                units
 791            )
 792        )
 793
 794    def get_net_width_maybe(self, units: str = 'm') -> OPT_FLOAT:
 795        """Get the field labeled as net_width_m in the API.
 796
 797        Args:
 798            units: The units in which the distance should be returned. Options:
 799                m or km for meters and kilometers respectively. Defaults to m.
 800
 801        Returns:
 802            Distance of the net fished or None if not given.
 803        """
 804        return afscgap.convert.convert_distance(
 805            self._haul.get_net_width_m_maybe(),
 806            units
 807        )
 808
 809    def get_net_height_maybe(self, units: str = 'm') -> OPT_FLOAT:
 810        """Get the field labeled as net_height_m in the API.
 811
 812        Args:
 813            units: The units in which the distance should be returned. Options:
 814                m or km for meters and kilometers respectively. Defaults to m.
 815
 816        Returns:
 817            Height of the net fished or None if not given.
 818        """
 819        return afscgap.convert.convert_distance(
 820            self._haul.get_net_height_m_maybe(),
 821            units
 822        )
 823
 824    def get_net_width(self, units: str = 'm') -> float:
 825        """Get the field labeled as net_width_m in the API.
 826
 827        Args:
 828            units: The units in which the distance should be returned. Options:
 829                m or km for meters and kilometers respectively. Defaults to m.
 830
 831        Returns:
 832            Distance of the net fished after asserting it is given.
 833        """
 834        return afscgap.model.assert_float_present(
 835            self.get_net_width_maybe(units=units)
 836        )
 837
 838    def get_net_height(self, units: str = 'm') -> float:
 839        """Get the field labeled as net_height_m in the API.
 840
 841        Args:
 842            units: The units in which the distance should be returned. Options:
 843                m or km for meters and kilometers respectively. Defaults to m.
 844
 845        Returns:
 846            Height of the net fished after asserting it is given.
 847        """
 848        return afscgap.model.assert_float_present(
 849            self.get_net_height_maybe(units=units)
 850        )
 851
 852    def get_area_swept(self, units: str = 'ha') -> float:
 853        """Get the field labeled as area_swept_ha in the API.
 854
 855        Args:
 856            units: The units in which the area should be returned. Options:
 857                ha, m2, km2. Defaults to ha.
 858
 859        Returns:
 860            Area covered by the net while fishing in desired units.
 861        """
 862        return afscgap.model.assert_float_present(
 863            afscgap.convert.convert_area(
 864                self._haul.get_area_swept_ha(),
 865                units
 866            )
 867        )
 868
 869    def get_duration(self, units: str = 'hr') -> float:
 870        """Get the field labeled as duration_hr in the API.
 871
 872        Args:
 873            units: The units in which the duration should be returned. Options:
 874                day, hr, min. Defaults to hr.
 875
 876        Returns:
 877            Duration of the haul.
 878        """
 879        return afscgap.model.assert_float_present(
 880            afscgap.convert.convert_time(self._haul.get_duration_hr(), units)
 881        )
 882
 883    def get_tsn(self) -> int:
 884        """Get taxonomic information system species code.
 885
 886        Returns:
 887            TSN for species.
 888        """
 889        return afscgap.model.assert_int_present(self._tsn)
 890
 891    def get_tsn_maybe(self) -> OPT_INT:
 892        """Get taxonomic information system species code.
 893
 894        Returns:
 895            TSN for species.
 896        """
 897        return self._tsn
 898
 899    def get_ak_survey_id(self) -> int:
 900        """Get the field labeled as ak_survey_id in the API.
 901
 902        Returns:
 903            AK survey ID if found.
 904        """
 905        return afscgap.model.assert_int_present(self._ak_survey_id)
 906
 907    def get_ak_survey_id_maybe(self) -> OPT_INT:
 908        """Get the field labeled as ak_survey_id in the API.
 909
 910        Returns:
 911            AK identifier for the survey or None if not given.
 912        """
 913        return self._ak_survey_id
 914
 915    def get_cpue_weight(self, units: str = 'kg/ha') -> float:
 916        """Get the value of field cpue_kgha with validity assert.
 917
 918        Args:
 919            units: The desired units for the catch per unit effort. Options:
 920                kg/ha, kg/km2, kg1000/km2. Defaults to kg/ha.
 921
 922        Raises:
 923            AssertionError: Raised if this field was not given by the API or
 924            could not be parsed as expected.
 925
 926        Returns:
 927            Catch weight divided by net area (kg / hectares) if available. See
 928            metadata. Always returns 0.
 929        """
 930        return 0
 931
 932    def get_cpue_count(self, units: str = 'count/ha') -> float:
 933        """Get the value of field cpue_noha with validity assert.
 934
 935        Args:
 936            units: The desired units for the catch per unit effort. Options:
 937                count/ha, count/km2, and count1000/km2. Defaults to count/ha.
 938
 939        Raises:
 940            AssertionError: Raised if this field was not given by the API or
 941            could not be parsed as expected.
 942
 943        Returns:
 944            Catch number divided by net sweep area if available (count /
 945            hectares). See metadata. Always returns 0.
 946        """
 947        return 0
 948
 949    def get_weight(self, units: str = 'kg') -> float:
 950        """Get the value of field weight_kg with validity assert.
 951
 952        Args:
 953            units: The units in which the weight should be returned. Options are
 954                g, kg for grams and kilograms respectively. Deafults to kg.
 955
 956        Raises:
 957            AssertionError: Raised if this field was not given by the API or
 958            could not be parsed as expected.
 959
 960        Returns:
 961            Taxon weight (kg) if available. See metadata. Always returns 0.
 962        """
 963        return 0
 964
 965    def get_count(self) -> float:
 966        """Get the value of field count with validity assert.
 967
 968        Raises:
 969            AssertionError: Raised if this field was not given by the API or
 970            could not be parsed as expected.
 971
 972        Returns:
 973            Always returns 0
 974        """
 975        return 0
 976
 977    def get_bottom_temperature(self, units='c') -> float:
 978        """Get the value of field bottom_temperature_c with validity assert.
 979
 980        Args:
 981            units: The units in which the temperature should be returned.
 982                Options: c or f for Celcius and Fahrenheit respectively.
 983                Defaults to c.
 984
 985        Raises:
 986            AssertionError: Raised if this field was not given by the API or
 987            could not be parsed as expected.
 988
 989        Returns:
 990            Bottom temperature associated with observation / inferrence if
 991            available.
 992        """
 993        return afscgap.model.assert_float_present(
 994            self.get_bottom_temperature_maybe(units=units)
 995        )
 996
 997    def get_surface_temperature(self, units='c') -> float:
 998        """Get the value of field surface_temperature_c with validity assert.
 999
1000        Args:
1001            units: The units in which the temperature should be returned.
1002                Options: c or f for Celcius and Fahrenheit respectively.
1003                Defaults to c.
1004
1005        Raises:
1006            AssertionError: Raised if this field was not given by the API or
1007            could not be parsed as expected.
1008
1009        Returns:
1010            Surface temperature associated with observation / inferrence if
1011            available.
1012        """
1013        return afscgap.model.assert_float_present(
1014            self.get_surface_temperature_maybe(units=units)
1015        )
1016
1017    def is_complete(self) -> bool:
1018        """Determine if this record has all of its values filled in.
1019
1020        Returns:
1021            True if all optional fields have a parsed value with the expected
1022            type and false otherwise.
1023        """
1024        tsn_given = self._tsn is not None
1025        ak_survey_id_given = self._ak_survey_id is not None
1026        return tsn_given and ak_survey_id_given and self._haul.is_complete()
1027
1028
1029def parse_haul(target: dict) -> afscgap.model.Haul:
1030    """Parse a Haul record from a row in the community Hauls flat file.
1031
1032    Args:
1033        target: Dict describing a single row from the community-maintained
1034            Hauls flat file.
1035
1036    Returns:
1037        Haul record constructed from the input row.
1038    """
1039    srvy = str(target['Srvy'])
1040    survey = str(target['Survey'])
1041    survey_id = float(target['Survey Id'])
1042    cruise = float(target['Cruise'])
1043    haul = float(target['Haul'])
1044    stratum = float(target['Stratum'])
1045    station = str(target['Station'])
1046    vessel_name = str(target['Vessel Name'])
1047    vessel_id = float(target['Vessel Id'])
1048    date_time = str(afscgap.convert.convert_to_iso8601(target['Date Time']))
1049    latitude_dd = float(target['Latitude Dd'])
1050    longitude_dd = float(target['Longitude Dd'])
1051    bottom_temperature_c = afscgap.model.get_opt_float(
1052        target['Bottom Temperature C']
1053    )
1054    surface_temperature_c = afscgap.model.get_opt_float(
1055        target['Surface Temperature C']
1056    )
1057    depth_m = float(target['Depth M'])
1058    distance_fished_km = float(target['Distance Fished Km'])
1059    net_width_m = afscgap.model.get_opt_float(target['Net Width M'])
1060    net_height_m = afscgap.model.get_opt_float(target['Net Height M'])
1061    area_swept_ha = float(target['Area Swept Ha'])
1062    duration_hr = float(target['Duration Hr'])
1063
1064    return afscgap.model.Haul(
1065        srvy,
1066        survey,
1067        survey_id,
1068        cruise,
1069        haul,
1070        stratum,
1071        station,
1072        vessel_name,
1073        vessel_id,
1074        date_time,
1075        latitude_dd,
1076        longitude_dd,
1077        bottom_temperature_c,
1078        surface_temperature_c,
1079        depth_m,
1080        distance_fished_km,
1081        net_width_m,
1082        net_height_m,
1083        area_swept_ha,
1084        duration_hr
1085    )

DEFAULT_HAULS_URL = 'https://pyafscgap.org/community/hauls.csv'

SPECIES_DICT = typing.Dict[str, afscgap.model.SpeciesRecord]

HAUL_LIST = typing.List[afscgap.model.Haul]

OPT_HAUL_LIST = typing.Optional[typing.List[afscgap.model.Haul]]

HAUL_FILTERABLE_FIELDS = ['year', 'srvy', 'survey', 'survey_id', 'cruise', 'haul', 'stratum', 'station', 'vessel_name', 'vessel_id', 'date_time', 'latitude_dd', 'longitude_dd', 'bottom_temperature_c', 'surface_temperature_c', 'depth_m', 'distance_fished_km', 'net_width_m', 'net_height_m', 'area_swept_ha', 'duration_hr']

PARAMS_CHECKER = typing.Callable[[afscgap.model.Haul], bool]

def build_inference_cursor( params: dict, inner_cursor: afscgap.cursor.Cursor, requestor: Optional[Callable[[str], requests.models.Response]] = None, hauls_url: Optional[str] = None, hauls_prefetch: Optional[List[afscgap.model.Haul]] = None): View Source

 62def build_inference_cursor(params: dict, inner_cursor: afscgap.cursor.Cursor,
 63    requestor: OPT_REQUESTOR = None, hauls_url: afscgap.client.OPT_STR = None,
 64    hauls_prefetch: OPT_HAUL_LIST = None):
 65    """Build a cursor which infers zero catch records.
 66
 67    Args:
 68        params: Dictionary of filters to apply to the query where a value of
 69            None means no filter should be applied on that field.
 70        inner_cursor: Cursor which yields records which, when appearing, should
 71            not be later inferred as zero catch records.
 72        requestor: Strategy to make HTTP GET requests. If None, will default
 73            to requests.get.
 74        hauls_url: The URL at which the Hauls file can be found or None to use
 75            a default. Defaults to None.
 76        hauls_prefetch: List of hauls data to use. If None, will request from
 77            hauls_url. If not None, will use this instead.
 78
 79    Returns:
 80        Cursor which 1) first iterates over the inner_cursor and then
 81        2) provides inferred zero catch records (for any hauls without observed
 82        data from inner_cursor for a species).
 83    """
 84    params_safe = copy.deepcopy(params)
 85
 86    if 'date_time' in params_safe:
 87        params_safe['date_time'] = afscgap.convert.convert_from_iso8601(
 88            params_safe['date_time']
 89        )
 90
 91    if hauls_prefetch is not None:
 92        hauls_data = hauls_prefetch
 93    else:
 94        hauls_data = get_hauls_data(
 95            params_safe,
 96            requestor=requestor,
 97            hauls_url=hauls_url
 98        )
 99
100    return NegativeInferenceCursorDecorator(inner_cursor, hauls_data)

Build a cursor which infers zero catch records.

Arguments:

params: Dictionary of filters to apply to the query where a value of None means no filter should be applied on that field.
inner_cursor: Cursor which yields records which, when appearing, should not be later inferred as zero catch records.
requestor: Strategy to make HTTP GET requests. If None, will default to requests.get.
hauls_url: The URL at which the Hauls file can be found or None to use a default. Defaults to None.
hauls_prefetch: List of hauls data to use. If None, will request from hauls_url. If not None, will use this instead.

Returns:

Cursor which 1) first iterates over the inner_cursor and then 2) provides inferred zero catch records (for any hauls without observed data from inner_cursor for a species).

def build_params_checker(params: dict) -> Callable[[afscgap.model.Haul], bool]: View Source

103def build_params_checker(params: dict) -> PARAMS_CHECKER:
104    """Build a function that checks if a single Haul record should be filtered.
105
106    Args:
107        params: Dictionary of filters to apply to the query where a value of
108            None means no filter should be applied on that field.
109
110    Returns:
111        Function which returns true if the record given to it should be included
112        (is not filtered out) or false if it should be excluded from results to
113        meet a query requirement (is filtered out).
114    """
115
116    def build_query_function(key: str, checker):
117        return lambda target: checker(target[key])
118
119    params_py = afscgap.query_util.interpret_query_to_py(params)
120    params_py_items = params_py.items()
121    params_py_items_given = filter(lambda x: x[1] is not None, params_py_items)
122    params_py_items_valid = filter(
123        lambda x: x[0] in HAUL_FILTERABLE_FIELDS,
124        params_py_items_given
125    )
126    params_funcs = map(
127        lambda x: build_query_function(x[0], x[1]),
128        params_py_items_valid
129    )
130    params_funcs_realized = list(params_funcs)
131
132    def check_all(target: afscgap.model.Haul) -> bool:
133        target_dict = target.to_dict()
134        not_allowed = filter(
135            lambda x: not x(target_dict),
136            params_funcs_realized
137        )
138        num_not_allowed = sum(map(lambda x: 1, not_allowed))
139        return num_not_allowed == 0
140
141    return check_all

Build a function that checks if a single Haul record should be filtered.

Arguments:

params: Dictionary of filters to apply to the query where a value of None means no filter should be applied on that field.

Returns:

Function which returns true if the record given to it should be included (is not filtered out) or false if it should be excluded from results to meet a query requirement (is filtered out).

def get_hauls_data( params: dict, requestor: Optional[Callable[[str], requests.models.Response]] = None, hauls_url: Optional[str] = None) -> List[afscgap.model.Haul]: View Source

144def get_hauls_data(params: dict, requestor: OPT_REQUESTOR = None,
145    hauls_url: afscgap.client.OPT_STR = None) -> HAUL_LIST:
146    """Download Hauls from a URL and apply a filter specified by params.
147
148    Args:
149        params: Dictionary of filters to apply to the query where a value of
150            None means no filter should be applied on that field.
151        requestor: Strategy to make HTTP GET requests. If None, will default
152            to requests.get.
153        hauls_url: The URL at which the Hauls file can be found or None to use
154            a default. Defaults to None.
155
156    Returns:
157        List of Haul data after having applied the filters described by params.
158    """
159    if hauls_url is None:
160        hauls_url = DEFAULT_HAULS_URL
161
162    params_checker = build_params_checker(params)
163
164    if requestor is None:
165        requestor = afscgap.http_util.build_requestor()
166
167    response = requestor(hauls_url)
168    afscgap.http_util.check_result(response)
169
170    response.encoding = 'utf-8'
171    response_io = io.StringIO(response.text, newline='')
172
173    response_rows = csv.DictReader(response_io)
174    response_hauls = map(parse_haul, response_rows)
175    response_hauls_filtered = filter(params_checker, response_hauls)
176
177    return list(response_hauls_filtered)

Download Hauls from a URL and apply a filter specified by params.

Arguments:

params: Dictionary of filters to apply to the query where a value of None means no filter should be applied on that field.
requestor: Strategy to make HTTP GET requests. If None, will default to requests.get.
hauls_url: The URL at which the Hauls file can be found or None to use a default. Defaults to None.

Returns:

List of Haul data after having applied the filters described by params.

def parse_haul(target: dict) -> afscgap.model.Haul: View Source

1030def parse_haul(target: dict) -> afscgap.model.Haul:
1031    """Parse a Haul record from a row in the community Hauls flat file.
1032
1033    Args:
1034        target: Dict describing a single row from the community-maintained
1035            Hauls flat file.
1036
1037    Returns:
1038        Haul record constructed from the input row.
1039    """
1040    srvy = str(target['Srvy'])
1041    survey = str(target['Survey'])
1042    survey_id = float(target['Survey Id'])
1043    cruise = float(target['Cruise'])
1044    haul = float(target['Haul'])
1045    stratum = float(target['Stratum'])
1046    station = str(target['Station'])
1047    vessel_name = str(target['Vessel Name'])
1048    vessel_id = float(target['Vessel Id'])
1049    date_time = str(afscgap.convert.convert_to_iso8601(target['Date Time']))
1050    latitude_dd = float(target['Latitude Dd'])
1051    longitude_dd = float(target['Longitude Dd'])
1052    bottom_temperature_c = afscgap.model.get_opt_float(
1053        target['Bottom Temperature C']
1054    )
1055    surface_temperature_c = afscgap.model.get_opt_float(
1056        target['Surface Temperature C']
1057    )
1058    depth_m = float(target['Depth M'])
1059    distance_fished_km = float(target['Distance Fished Km'])
1060    net_width_m = afscgap.model.get_opt_float(target['Net Width M'])
1061    net_height_m = afscgap.model.get_opt_float(target['Net Height M'])
1062    area_swept_ha = float(target['Area Swept Ha'])
1063    duration_hr = float(target['Duration Hr'])
1064
1065    return afscgap.model.Haul(
1066        srvy,
1067        survey,
1068        survey_id,
1069        cruise,
1070        haul,
1071        stratum,
1072        station,
1073        vessel_name,
1074        vessel_id,
1075        date_time,
1076        latitude_dd,
1077        longitude_dd,
1078        bottom_temperature_c,
1079        surface_temperature_c,
1080        depth_m,
1081        distance_fished_km,
1082        net_width_m,
1083        net_height_m,
1084        area_swept_ha,
1085        duration_hr
1086    )

Parse a Haul record from a row in the community Hauls flat file.

Arguments:

target: Dict describing a single row from the community-maintained Hauls flat file.

Returns:

Haul record constructed from the input row.