diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ad13caeb..4a5a67a6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -116,6 +116,7 @@ jobs: merge: runs-on: ubuntu-latest + if: ${{ ! github.event.pull_request.head.repo.fork }} permissions: contents: read packages: write diff --git a/.gitleaks.toml b/.gitleaks.toml deleted file mode 100644 index cddfcd7c..00000000 --- a/.gitleaks.toml +++ /dev/null @@ -1,55 +0,0 @@ -title = "gitleaks config" -[[rules]] - description = "AWS Manager ID" - regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}''' - tags = ["key", "AWS"] -[[rules]] - description = "AWS Secret Key" - regex = '''(?i)aws(.{0,20})?(?-i)[0-9a-zA-Z\/+]{40}''' - tags = ["key", "AWS"] -[[rules]] - description = "AWS MWS key" - regex = '''amzn\.mws\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}''' - tags = ["key", "AWS", "MWS"] -[[rules]] - description = "Github" - regex = '''(?i)github(.{0,20})?(?-i)[0-9a-zA-Z]{35,40}''' - tags = ["key", "Github"] -[[rules]] - description = "Asymmetric Private Key" - regex = '''-----BEGIN ((EC|PGP|DSA|RSA|OPENSSH) )?PRIVATE KEY( BLOCK)?-----''' - tags = ["key", "AsymmetricPrivateKey"] -[[rules]] - description = "Generic Credential" - regex = '''(?i)(api_key|apikey|secret|password|pass|pw|key)(.{0,20})?[0-9a-zA-Z]{16,45}''' - tags = ["key", "API", "generic"] - [[rules.whitelist]] - regex = '''KeyChecking.no.*''' - description = "Ignore ssh settings for GitLab tools-bot" -[[rules]] - description = "Google API key" - regex = '''AIza[0-9A-Za-z\\-_]{35}''' - tags = ["key", "Google"] -[[rules]] - description = "Google (GCP) Service Account" - regex = '''"type": "service_account"''' - tags = ["key", "Google"] -[[rules]] - description = "Heroku API key" - regex = '''(?i)heroku(.{0,20})?[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}''' - tags = ["key", "Heroku"] -[[rules]] - description = "MailChimp API key" - regex = '''(?i)(mailchimp|mc)(.{0,20})?[0-9a-f]{32}-us[0-9]{1,2}''' - tags = ["key", "Mailchimp"] -[[rules]] - description = "Mailgun API key" - regex = '''((?i)(mailgun|mg)(.{0,20})?)?key-[0-9a-z]{32}''' - tags = ["key", "Mailgun"] -[[rules]] - description = "Slack Webhook" - regex = '''https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24}''' - tags = ["key", "slack"] -[whitelist] - description = "Whitelisted files" - files = ['''(^.*gitleaks.toml$|(.*?)(jpg|gif|doc|pdf|bin)$)'''] diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f9ec429..eac621a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.28.2] + +### Added +* Support for Sentinel-2 L2A COG processing via the AWS Element84 API. +* Option to force the regeneration of Sentinel-1 static layers. + +### Changed +* `process.get_s2_metadata` now dynamically routes to either the Google Cloud Storage L1C workflow or the new AWS L2A workflow based on the scene name format. + +### Fixed +* The Sentinel-1 workflow now correctly uses the chip size and search range overrides. + ## [0.28.1] ### Changed diff --git a/src/hyp3_autorift/process.py b/src/hyp3_autorift/process.py index ad7ec062..e257d2f9 100644 --- a/src/hyp3_autorift/process.py +++ b/src/hyp3_autorift/process.py @@ -143,7 +143,50 @@ def get_raster_bbox(path: str): ] -def get_s2_metadata(scene_name): +def get_s2_l2a_metadata(scene_name: str) -> dict: + url = f'https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a/items/{scene_name}' + + response = requests.get(url) + response.raise_for_status() + item = response.json() + + band_url = item['assets']['nir']['href'] + + if band_url.startswith('s3://'): + vsi_path = band_url.replace('s3://', '/vsis3/') + elif band_url.startswith('https://'): + vsi_path = '/vsicurl/' + band_url + else: + vsi_path = band_url + + bbox = item.get('bbox') + if not bbox: + bbox = get_raster_bbox(vsi_path) + + raw_dt = item['properties']['datetime'] + try: + dt_obj = datetime.strptime(raw_dt, '%Y-%m-%dT%H:%M:%S.%fZ') + except ValueError: + dt_obj = datetime.strptime(raw_dt, '%Y-%m-%dT%H:%M:%SZ') + + clean_dt = dt_obj.strftime('%Y-%m-%dT%H:%M:%SZ') + + return { + 'path': vsi_path, + 'bbox': bbox, + 'id': scene_name, + 'properties': {'datetime': clean_dt, 'proj:epsg': item['properties'].get('proj:epsg')}, + } + + +def get_s2_metadata(scene_name: str) -> dict: + """Routes the metadata request based on the scene name format.""" + + # Element84 L2A STAC items + if scene_name.endswith('_L2A') or 'L2A' in scene_name: + return get_s2_l2a_metadata(scene_name) + + # Google Cloud L1C .SAFE items path = get_s2_path(scene_name) bbox = get_raster_bbox(path) acquisition_start = datetime.strptime(scene_name.split('_')[2], '%Y%m%dT%H%M%S') @@ -304,6 +347,7 @@ def process( naming_scheme: Literal['ITS_LIVE_OD', 'ITS_LIVE_PROD'] = 'ITS_LIVE_OD', publish_bucket: str = '', use_static_files: bool = True, + regenerate_static_files: bool = False, frame_id: str | None = None, ) -> Tuple[Path, Path, Path]: """Process a Sentinel-1, Sentinel-2, or Landsat-8 image pair @@ -316,7 +360,8 @@ def process( search_range: (Optional) Specify a search range in pixels (e.g., 32 or 64). Overrides parameter-file defaults. naming_scheme: Naming scheme to use for product files publish_bucket: S3 bucket to upload Sentinel-1 static topographic correction files to - use_static_files: Use pre-generated static topographic correction files if available + use_static_files: Use pre-generated static topographic correction files if available (Sentinel-1 only). + regenerate_static_files: Force the creation of, and upload of, new static files (Sentinel-1 only). frame_id: OPERA frame ID to record in the img_pair_info variable in the autoRIFT product file Returns: @@ -335,14 +380,27 @@ def process( from hyp3_autorift.s1_isce3 import process_sentinel1_burst_isce3 netcdf_file = process_sentinel1_burst_isce3( - reference, secondary, publish_bucket, use_static_files, frame_id, chip_size=chip_size + reference, + secondary, + publish_bucket, + use_static_files, + frame_id, + regenerate_static_files=regenerate_static_files, + chip_size=chip_size, + search_range=search_range, ) elif platform == 'S1-SLC': from hyp3_autorift.s1_isce3 import process_sentinel1_slc_isce3 netcdf_file = process_sentinel1_slc_isce3( - reference[0], secondary[0], publish_bucket, use_static_files, chip_size=chip_size + reference[0], + secondary[0], + publish_bucket, + use_static_files, + regenerate_static_files=regenerate_static_files, + chip_size=chip_size, + search_range=search_range, ) elif platform == 'NISAR': @@ -581,6 +639,12 @@ def main(): default=True, help='Use static topographic correction files for ISCE3 processing if available (Sentinel-1 only).', ) + parser.add_argument( + '--regenerate-static-files', + type=string_is_true, + default=False, + help='Force the creation of, and upload of, new static files (Sentinel-1 only).', + ) parser.add_argument( '--frame-id', type=utils.nullable_string, @@ -588,6 +652,7 @@ def main(): help='Optional OPERA frame ID to include in metadata for Sentinel-1 multi-burst processing, ' 'and will be ignored otherwise.', ) + args = parser.parse_args() logging.basicConfig( @@ -649,6 +714,7 @@ def main(): naming_scheme=args.naming_scheme, publish_bucket=args.publish_bucket, use_static_files=args.use_static_files, + regenerate_static_files=args.regenerate_static_files, frame_id=args.frame_id, ) diff --git a/src/hyp3_autorift/s1_isce3.py b/src/hyp3_autorift/s1_isce3.py index 9d550b82..d0575705 100644 --- a/src/hyp3_autorift/s1_isce3.py +++ b/src/hyp3_autorift/s1_isce3.py @@ -41,6 +41,7 @@ def process_sentinel1_burst_isce3( static_files_bucket, use_static_files, frame_id, + regenerate_static_files: bool = False, chip_size: int | None = None, search_range: int | None = None, ): @@ -67,7 +68,9 @@ def process_sentinel1_burst_isce3( use_static_files, frame_id, swaths, + regenerate_static_files, chip_size, + search_range, ) reference = reference[0] @@ -86,7 +89,9 @@ def process_sentinel1_burst_isce3( burst_id_sec, static_files_bucket, use_static_files, + regenerate_static_files, chip_size, + search_range, ) @@ -100,6 +105,7 @@ def process_burst( burst_id_sec, static_files_bucket, use_static_files, + regenerate_static_files: bool = False, chip_size: int | None = None, search_range: int | None = None, ): @@ -113,7 +119,10 @@ def process_burst( bounds=[lon_limits[0], lat_limits[0], lon_limits[1], lat_limits[1]], ) - if use_static_files: + if regenerate_static_files: + has_static_layer = False + do_static_upload = True + elif use_static_files: retrieval_bucket = static_files_bucket if static_files_bucket else S3_BUCKET has_static_layer = get_static_layer(burst_id_ref, retrieval_bucket) do_static_upload = not has_static_layer and static_files_bucket @@ -209,6 +218,7 @@ def process_sentinel1_slc_isce3( slc_sec, static_files_bucket, use_static_files, + regenerate_static_files: bool = False, chip_size: int | None = None, search_range: int | None = None, ): @@ -233,6 +243,7 @@ def process_sentinel1_slc_isce3( frame_id='N/A', chip_size=chip_size, search_range=search_range, + regenerate_static_files=regenerate_static_files, ) @@ -247,6 +258,7 @@ def process_slc( use_static_files, frame_id, swaths=(1, 2, 3), + regenerate_static_files: bool = False, chip_size: int | None = None, search_range: int | None = None, ): @@ -265,7 +277,10 @@ def process_slc( swath = int(burst_id.split('_')[-1][-1]) burst = s1reader.load_bursts(safe_ref, orbit_ref, swath, pol, burst_ids=[burst_id])[0] - if use_static_files: + if regenerate_static_files: + has_static_layer = False + do_static_upload = True + elif use_static_files: retrieval_bucket = static_files_bucket if static_files_bucket else S3_BUCKET has_static_layer = get_static_layer(burst_id, retrieval_bucket) do_static_upload = not has_static_layer and static_files_bucket