Skip to content

ee_image

export_image(image_or_asset_id, geometry, transform, crs, bands_gee, dtype_dst=None, pad_add=(0, 0), crs_polygon='EPSG:4326', resolution_dst=None)

Exports an image from the GEE as a GeoTensor. It uses the ee.data.getPixels or ee.data.computePixels method to export the image.

Parameters:

Name Type Description Default
image_or_asset_id Union[str, Image]

Name of the asset or ee.Image object.

required
geometry Union[Polygon, MultiPolygon]

geometry to export

required
transform Affine

transform of the geometry

required
crs str

crs of the geometry

required
pad_add Tuple[int, int]

pad in pixels to add to the resulting window that is read. This is useful when this function is called for interpolation/CNN prediction.

(0, 0)
bands_gee List[str]

List of bands to export

required
crs_polygon str

crs of the geometry. Defaults to "EPSG:4326".

'EPSG:4326'

Returns:

Name Type Description
GeoTensor GeoTensor

GeoTensor object

Source code in georeader/readers/ee_image.py
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def export_image(image_or_asset_id:Union[str, ee.Image], 
                 geometry:Union[Polygon, MultiPolygon],
                 transform:Affine, crs:str,
                 bands_gee:List[str], 
                 dtype_dst:Optional[str]=None,
                 pad_add:Tuple[int, int]=(0, 0),
                 crs_polygon:str="EPSG:4326",
                 resolution_dst: Optional[Union[float, Tuple[float, float]]]=None) -> GeoTensor:
    """
    Exports an image from the GEE as a GeoTensor. 
     It uses the `ee.data.getPixels` or `ee.data.computePixels` method to export the image.

    Args:
        image_or_asset_id (Union[str, ee.Image]): Name of the asset or ee.Image object.
        geometry (Union[Polygon, MultiPolygon]): geometry to export
        transform (Affine): transform of the geometry
        crs (str): crs of the geometry
        pad_add: pad in pixels to add to the resulting `window` that is read. This is useful when this function 
            is called for interpolation/CNN prediction.
        bands_gee (List[str]): List of bands to export
        crs_polygon (str, optional): crs of the geometry. Defaults to "EPSG:4326".

    Returns:
        GeoTensor: GeoTensor object
    """
    if isinstance(image_or_asset_id, str):
        method = ee.data.getPixels
        request_params = {"assetId": image_or_asset_id}
    elif isinstance(image_or_asset_id, ee.Image):
        method = ee.data.computePixels
        request_params = {"expression": image_or_asset_id}
        # TODO if crs and transform are not provided get it from the image?
    else:
        raise ValueError(f"image_or_asset_id must be a string or ee.Image object found {type(image_or_asset_id)}")

    geodata = FakeGeoData(crs=crs, transform=transform)

    # Pixel coordinates surrounding the geometry
    window_polygon = read.window_from_polygon(geodata, geometry, crs_polygon=crs_polygon,
                                              window_surrounding=True)
    if any(p > 0 for p in pad_add):
        window_polygon = window_utils.pad_window(window_polygon, pad_add)
    window_polygon = window_utils.round_outer_window(window_polygon)

    # Shift the window to the image coordinates
    transform_window = rasterio.windows.transform(window_polygon, geodata.transform)

    if resolution_dst is not None:
        transform_window = window_utils.transform_to_resolution_dst(transform_window, resolution_dst)

    request_params.update({
                    'fileFormat':"GEO_TIFF", 
                    'bandIds':  bands_gee,
                    'grid': {
                        'dimensions': {
                            'height': window_polygon.height, 
                            'width': window_polygon.width
                        },
                        'affineTransform': {
                            'scaleX': transform_window.a,
                            'shearX': transform_window.b,
                            'translateX': transform_window.c,
                            'shearY': transform_window.d,
                            'scaleY': transform_window.e,
                            'translateY': transform_window.f
                        },
                        'crsCode': geodata.crs
                    }
                    })

    try:
        data_raw = method(request_params)
        data = rasterio.open(BytesIO(data_raw))
        geotensor = GeoTensor(data.read(), transform=data.transform,
                             crs=data.crs, fill_value_default=data.nodata)
        if dtype_dst is not None:
            geotensor = geotensor.astype(dtype_dst)

    except ee.EEException as e:
        # Check if the exception starts with Total request size
        if str(e).startswith("Total request size"):
            # Split the geometry in two and call recursively
            pols_execute = []
            for sb in split_bounds(geometry.bounds):
                pol = box(*sb)
                if not geometry.intersects(pol):
                    continue
                pols_execute.append(pol.intersection(geometry))

            def process_bound(poly):
                gt = export_image(image_or_asset_id=image_or_asset_id, geometry=poly, 
                                  crs=crs, transform=transform, 
                                  bands_gee=bands_gee, dtype_dst=dtype_dst, 
                                  crs_polygon=crs_polygon,
                                  resolution_dst=resolution_dst)
                return gt

            with ThreadPoolExecutor() as executor:
                geotensors = list(executor.map(process_bound, pols_execute))

            # Remove None values from the list
            geotensors = [gt for gt in geotensors if gt is not None]

            dst_crs = geotensors[0].crs
            aoi_dst_crs = window_utils.polygon_to_crs(geometry, 
                                                      crs_polygon=crs_polygon, 
                                                      dst_crs=dst_crs)

            geotensor = mosaic.spatial_mosaic(geotensors, 
                                              dtype_dst=dtype_dst,
                                              polygon=aoi_dst_crs, 
                                              dst_crs=dst_crs)
        else:
            raise e
    return geotensor

export_cube(query, geometry, transform=None, crs=None, dtype_dst=None, bands_gee=None, crs_polygon='EPSG:4326', display_progress=True)

Download all images in the query that intersects the geometry.

Note: This function is intended for small areas. If the area is too big that there are several images per day that intesesects the geometry, it will not group the images by day.

Parameters:

Name Type Description Default
query GeoDataFrame

dataframe from georeaders.readers.query. Required columns: gee_id, collection_name, bands_gee

required
geometry Union[Polygon, MultiPolygon]

geometry to export

required
transform Optional[Affine]

transform of the geometry. If None it will use the transform of the first image translated to the geometry. Defaults to None.

None
crs Optional[str]

crs of the geometry. If None it will use the crs of the first image. Defaults to None.

None
dtype_dst Optional[str]

dtype of the output GeoTensor. Defaults to None.

None
bands_gee Optional[List[str]]

List of bands to export. If None it will use the bands_gee column in the query. Defaults to None.

None
crs_polygon _type_

crs of the geometry. Defaults to "EPSG:4326".

'EPSG:4326'
display_progress bool

Display progress bar. Defaults to False.

True

Returns:

Name Type Description
GeoTensor Optional[GeoTensor]

GeoTensor object with 4 dimensions: (time, band, y, x)

Source code in georeader/readers/ee_image.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
def export_cube(query:gpd.GeoDataFrame, geometry:Union[Polygon, MultiPolygon], 
                transform:Optional[Affine]=None, crs:Optional[str]=None,
                dtype_dst:Optional[str]=None,
                bands_gee:Optional[List[str]]=None,
                crs_polygon:str="EPSG:4326",
                display_progress:bool=True) -> Optional[GeoTensor]:
    """
    Download all images in the query that intersects the geometry. 

    Note: This function is intended for small areas. If the area is too big that there are several images per day that intesesects the geometry, it will not group the images by day.

    Args:
        query (gpd.GeoDataFrame): dataframe from `georeaders.readers.query`. Required columns: gee_id, collection_name, bands_gee
        geometry (Union[Polygon, MultiPolygon]): geometry to export
        transform (Optional[Affine], optional): transform of the geometry. If None it will use the transform of the first image translated to the geometry. 
            Defaults to None.
        crs (Optional[str], optional): crs of the geometry. If None it will use the crs of the first image. Defaults to None.
        dtype_dst (Optional[str], optional): dtype of the output GeoTensor. Defaults to None.
        bands_gee (Optional[List[str]], optional): List of bands to export. If None it will use the bands_gee column in the query. Defaults to None.
        crs_polygon (_type_, optional): crs of the geometry. Defaults to "EPSG:4326".
        display_progress (bool, optional): Display progress bar. Defaults to False.

    Returns:
        GeoTensor: GeoTensor object with 4 dimensions: (time, band, y, x)
    """

    # TODO group by solar_day and satellite??
    if query.shape[0] == 0:
        return None

    # Check required columns
    required_columns = ["gee_id", "collection_name"]
    if bands_gee is None:
        required_columns.append("bands_gee")
    if not all([col in query.columns for col in required_columns]):
        raise ValueError(f"Columns {required_columns} are required in the query dataframe")

    # Get the first image to get the crs and transform if not provided
    if crs is None:
        first_image = query.iloc[0]
        if "proj" not in first_image:
            raise ValueError("proj column is required in the query dataframe if crs is not provided")
        crs = first_image["proj"]["crs"]

    if transform is None:
        first_image = query.iloc[0]
        if "proj" not in first_image:
            raise ValueError("proj column is required in the query dataframe if transform is not provided")
        transform = Affine(*first_image["proj"]["transform"])


    # geotensor_list = []
    # for i, image in query.iterrows():
    #     asset_id = f'{image["collection_name"]}/{image["gee_id"]}'
    #     geotensor = export_image_getpixels(asset_id, geometry, proj, image["bands_gee"], crs_polygon=crs_polygon, dtype_dst=dtype_dst)
    #     geotensor_list.append(geotensor)

    def process_query_image(tuple_row):
        _, image = tuple_row
        asset_id = f'{image["collection_name"]}/{image["gee_id"]}'
        if bands_gee is None:
            bands_gee_iter = image["bands_gee"]
        else:
            bands_gee_iter = bands_gee
        geotensor = export_image(asset_id, geometry=geometry, crs=crs, transform=transform,
                                 bands_gee_iter=bands_gee_iter, 
                                 crs_polygon=crs_polygon, dtype_dst=dtype_dst)
        return geotensor

    with ThreadPoolExecutor() as executor:
        geotensor_list = list(tqdm(executor.map(process_query_image, query.iterrows()), 
                                   total=query.shape[0], disable=not display_progress))

    return concatenate(geotensor_list)

query(area, date_start, date_end, producttype='S2', filter_duplicates=True, return_collection=False, add_s2cloudless=False, extra_metadata_keys=None)

Query Landsat and Sentinel-2 products from the Google Earth Engine.

Parameters:

Name Type Description Default
area Union[MultiPolygon, Polygon]

area to query images in EPSG:4326

required
date_start datetime

datetime in a given timezone. If tz not provided UTC will be assumed.

required
date_end datetime

datetime in UTC. If tz not provided UTC will be assumed.

required
producttype str

'S2', "Landsat"-> {"L8", "L9"}, "both" -> {"S2", "L8", "L9"}, "S2_SR", "L8", "L9"

'S2'
filter_duplicates bool

Filter S2 images that are duplicated

True
return_collection bool

returns also the corresponding image collection

False
add_s2cloudless bool

Adds a column that indicates if the s2cloudless image is available (from collection COPERNICUS/S2_CLOUD_PROBABILITY collection)

False
extra_metadata_keys Optional[List[str]]

list of extra metadata keys to add to the geodataframe.

None

Returns:

Type Description
Union[GeoDataFrame, Tuple[GeoDataFrame, ImageCollection]]

geodataframe with available products in the given area and time range

Union[GeoDataFrame, Tuple[GeoDataFrame, ImageCollection]]

if return_collection is True it also returns the ee.ImageCollection of available images

Source code in georeader/readers/ee_query.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
def query(area:Union[MultiPolygon,Polygon],
          date_start:datetime, date_end:datetime,
          producttype:str='S2', filter_duplicates:bool=True,
          return_collection:bool=False,
          add_s2cloudless:bool=False,
          extra_metadata_keys:Optional[List[str]]=None
          )-> Union[gpd.GeoDataFrame, Tuple[gpd.GeoDataFrame, ee.ImageCollection]]:
    """
    Query Landsat and Sentinel-2 products from the Google Earth Engine.

    Args:
        area: area to query images in EPSG:4326
        date_start: datetime in a given timezone. If tz not provided UTC will be assumed.
        date_end: datetime in UTC. If tz not provided UTC will be assumed.
        producttype: 'S2', "Landsat"-> {"L8", "L9"}, "both" -> {"S2", "L8", "L9"}, "S2_SR", "L8", "L9"
        filter_duplicates: Filter S2 images that are duplicated
        return_collection: returns also the corresponding image collection
        add_s2cloudless: Adds a column that indicates if the s2cloudless image is available (from collection
            COPERNICUS/S2_CLOUD_PROBABILITY collection)
        extra_metadata_keys: list of extra metadata keys to add to the geodataframe.

    Returns:
        geodataframe with available products in the given area and time range
        if `return_collection` is True it also returns the `ee.ImageCollection` of available images
    """

    pol = ee.Geometry(mapping(area))

    if date_start.tzinfo is not None:
        tz = date_start.tzinfo
        if isinstance(tz, ZoneInfo):
            tz = tz.key

        date_start = date_start.astimezone(timezone.utc)
        date_end = date_end.astimezone(timezone.utc)
    else:
        tz = timezone.utc

    assert date_end >= date_start, f"Date end: {date_end} prior to date start: {date_start}"

    if producttype == "S2_SR":
        image_collection_name = "COPERNICUS/S2_SR_HARMONIZED"
        keys_query = {"PRODUCT_ID": "title", 'CLOUDY_PIXEL_PERCENTAGE': "cloudcoverpercentage"}
    elif producttype == "S2":
        image_collection_name = "COPERNICUS/S2_HARMONIZED"
        keys_query = {"PRODUCT_ID": "title", 'CLOUDY_PIXEL_PERCENTAGE': "cloudcoverpercentage"}
    elif producttype == "Landsat":
        image_collection_name = "LANDSAT/LC08/C02/T1_RT_TOA"
        keys_query = {"LANDSAT_PRODUCT_ID": "title", 'CLOUD_COVER': "cloudcoverpercentage"}
    elif producttype == "L8":
        image_collection_name = "LANDSAT/LC08/C02/T1_RT_TOA"
        keys_query = {"LANDSAT_PRODUCT_ID": "title", 'CLOUD_COVER': "cloudcoverpercentage"}
    elif producttype == "L9":
        image_collection_name = "LANDSAT/LC09/C02/T1_TOA"
        keys_query = {"LANDSAT_PRODUCT_ID": "title", 'CLOUD_COVER': "cloudcoverpercentage"}
    elif producttype == "both":
        image_collection_name = "LANDSAT/LC08/C02/T1_RT_TOA"
        keys_query = {"LANDSAT_PRODUCT_ID": "title", 'CLOUD_COVER': "cloudcoverpercentage"}
    else:
        raise NotImplementedError(f"Unknown product type {producttype}")

    img_col = ee.ImageCollection(image_collection_name).filterDate(date_start.replace(tzinfo=None),
                                                                   date_end.replace(tzinfo=None)).filterBounds(
        pol)
    if "T1" in image_collection_name:
        # Add tier 2 data to the query
        image_collection_name_t2 = image_collection_name.replace("T1_RT", "T2").replace("T1", "T2")
        img_col_t1 = ee.ImageCollection(image_collection_name_t2).filterDate(date_start.replace(tzinfo=None),
                                                                     date_end.replace(tzinfo=None)).filterBounds(
            pol)
        img_col = img_col.merge(img_col_t1)

    if (producttype == "Landsat") or (producttype == "both"):
        img_col_l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_TOA").filterDate(date_start.replace(tzinfo=None),
                                                                   date_end.replace(tzinfo=None)).filterBounds(
        pol)
        img_col = img_col.merge(img_col_l9)
        img_col_l9_t2 = ee.ImageCollection("LANDSAT/LC09/C02/T2_TOA").filterDate(date_start.replace(tzinfo=None),
                                                                   date_end.replace(tzinfo=None)).filterBounds(
        pol)
        img_col = img_col.merge(img_col_l9_t2)

    if extra_metadata_keys is None:
        extra_metadata_keys = []

    geodf = img_collection_to_feature_collection(img_col,
                                                 ["system:time_start"] + list(keys_query.keys()) + extra_metadata_keys,
                                                as_geopandas=True, band_crs="B2")


    geodf.rename(keys_query, axis=1, inplace=True)

    # Filter tirs only image (title starts with LT08)
    if geodf.shape[0] > 0:
        tile_starts_with_lt08 = geodf["title"].str.startswith("LT08")
        if tile_starts_with_lt08.any():
            warnings.warn(f"Found {tile_starts_with_lt08.sum()} images of Landsat-8 TIRS only. Removing them.")
            geodf = geodf[~tile_starts_with_lt08].copy()
        if (producttype == "Landsat") or (producttype == "both") or (producttype == "L8") or (producttype == "L9"):
            geodf["collection_name"] = geodf["title"].apply(figure_out_collection_landsat)
        else:
            geodf["collection_name"] = image_collection_name

    img_col = img_col.map(lambda x: _rename_add_properties(x, keys_query))

    if producttype == "both":
        img_col_s2 = ee.ImageCollection("COPERNICUS/S2_HARMONIZED").filterDate(date_start.replace(tzinfo=None),
                                                                              date_end.replace(
                                                                                  tzinfo=None)).filterBounds(
            pol)
        keys_query_s2 = {"PRODUCT_ID": "title", 'CLOUDY_PIXEL_PERCENTAGE': "cloudcoverpercentage"}
        geodf_s2 = img_collection_to_feature_collection(img_col_s2,
                                                        ["system:time_start"] + list(keys_query_s2.keys()) + extra_metadata_keys,
                                                        as_geopandas=True, band_crs="B2")
        geodf_s2["collection_name"] = "COPERNICUS/S2_HARMONIZED"
        geodf_s2.rename(keys_query_s2, axis=1, inplace=True)
        if geodf_s2.shape[0] > 0:
            if geodf.shape[0] == 0:
                geodf = geodf_s2
            else:
                geodf = pd.concat([geodf_s2, geodf], ignore_index=True)

            img_col_s2 = img_col_s2.map(lambda x: _rename_add_properties(x, keys_query_s2))
            img_col = img_col.merge(img_col_s2)

    if geodf.shape[0] == 0:
        warnings.warn(f"Not images found of collection {producttype} between dates {date_start} and {date_end}")
        if return_collection:
            return geodf, img_col
        return geodf

    if add_s2cloudless and producttype in ["both", "S2"]:
        values_s2_idx = geodf.title.apply(lambda x: x.startswith("S2"))
        indexes_s2 = geodf.gee_id[values_s2_idx].tolist()
        img_col_cloudprob = ee.ImageCollection("COPERNICUS/S2_CLOUD_PROBABILITY").filterDate(date_start.replace(tzinfo=None),
                                                                              date_end.replace(
                                                                                  tzinfo=None)).filterBounds(
            pol)
        img_col_cloudprob = img_col_cloudprob.filter(ee.Filter.inList("system:index", ee.List(indexes_s2)))
        geodf_cloudprob = img_collection_to_feature_collection(img_col_cloudprob,
                                                               ["system:time_start"],
                                                               as_geopandas=True)
        geodf["s2cloudless"] = False
        list_geeid = geodf_cloudprob.gee_id.tolist()
        geodf.loc[values_s2_idx, "s2cloudless"] = geodf.loc[values_s2_idx, "gee_id"].apply(lambda x: x in list_geeid)


    geodf = _add_stuff(geodf, area, tz)

    # Fix ids of Landsat to remove initial shit in the names
    # projects/earthengine-public/assets/LANDSAT/LC09/C02/T1_TOA/1_2_LO09_037031_20220315
    if geodf.satellite.str.startswith("LC0").any():
        geodf.loc[geodf.satellite.str.startswith("LC0"),"gee_id"] = geodf.loc[geodf.satellite.str.startswith("LC0"),"gee_id"].apply(lambda x: "LC0"+x.split("LC0")[1])

    # Fix ids of Landsat to remove initial shit in the names also if satellite starts with LO0
    if geodf.satellite.str.startswith("LO0").any():
        geodf.loc[geodf.satellite.str.startswith("LO0"),"gee_id"] = geodf.loc[geodf.satellite.str.startswith("LO0"),"gee_id"].apply(lambda x: "LO0"+x.split("LO0")[1])


    if filter_duplicates:
        # TODO filter prioritizing s2cloudless?
        geodf = query_utils.filter_products_overlap(area, geodf,
                                                    groupkey=["solarday", "satellite"]).copy()
        # filter img_col:
        img_col = img_col.filter(ee.Filter.inList("title", ee.List(geodf.index.tolist())))

    geodf.sort_values("utcdatetime")
    img_col = img_col.sort("system:time_start")

    if return_collection:
        return geodf, img_col

    return geodf

query_s1(area, date_start, date_end, filter_duplicates=True, return_collection=False)

Query S1 products from the Google Earth Engine

Parameters:

Name Type Description Default
area Union[MultiPolygon, Polygon]
required
date_start datetime
required
date_end datetime
required
filter_duplicates bool
True
return_collection bool
False

Returns:

Source code in georeader/readers/ee_query.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def query_s1(area:Union[MultiPolygon,Polygon],
             date_start:datetime, date_end:datetime,
             filter_duplicates:bool=True,
             return_collection:bool=False)-> Union[gpd.GeoDataFrame, Tuple[gpd.GeoDataFrame, ee.ImageCollection]]:
    """
    Query S1 products from the Google Earth Engine

    Args:
        area:
        date_start:
        date_end:
        filter_duplicates:
        return_collection:

    Returns:

    """
    pol = ee.Geometry(mapping(area))

    if date_start.tzinfo is not None:
        tz = date_start.tzinfo
        if isinstance(tz, ZoneInfo):
            tz = tz.key

        date_start = date_start.astimezone(timezone.utc)
        date_end = date_end.astimezone(timezone.utc)
    else:
        tz = timezone.utc

    assert date_end >= date_start, f"Date end: {date_end} prior to date start: {date_start}"

    img_col = ee.ImageCollection('COPERNICUS/S1_GRD').\
       filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')).\
       filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH')).\
       filter(ee.Filter.eq('instrumentMode', 'IW')).\
       filterDate(date_start.replace(tzinfo=None),
                  date_end.replace(tzinfo=None)).\
       filterBounds(pol)

    keys_query = {"orbitProperties_pass": "orbitProperties_pass"}

    geodf = img_collection_to_feature_collection(img_col,
                                                 ["system:time_start"] + list(keys_query.keys()),
                                                 as_geopandas=True, band_crs="VV")
    geodf.rename(keys_query, axis=1, inplace=True)
    geodf["title"] = geodf["gee_id"]
    geodf["collection_name"] = "COPERNICUS/S1_GRD"
    geodf = _add_stuff(geodf, area, tz)

    if filter_duplicates:
        geodf = query_utils.filter_products_overlap(area, geodf,
                                                    groupkey=["solarday", "satellite","orbitProperties_pass"]).copy()
        # filter img_col:
        img_col = img_col.filter(ee.Filter.inList("system:index", ee.List(geodf.index.tolist())))

    geodf.sort_values("utcdatetime")
    img_col = img_col.sort("system:time_start")

    if return_collection:
        return geodf, img_col

    return geodf

query_landsat_457(area, date_start, date_end, producttype='all', filter_duplicates=True, return_collection=False, extra_metadata_keys=None)

Query Landsat-7, Landsat-5 or Landsat-4 products from the Google Earth Engine.

Parameters:

Name Type Description Default
area Union[MultiPolygon, Polygon]

area to query images in EPSG:4326

required
date_start datetime

datetime in a given timezone. If tz not provided UTC will be assumed.

required
date_end datetime

datetime in UTC. If tz not provided UTC will be assumed.

required
producttype str

'all' -> {"L4", "L5", "L7"}, "L4", "L5" or "L7". Defaults to "all".

'all'
filter_duplicates bool

filter duplicate images over the same area. Defaults to True.

True
return_collection bool

returns also the corresponding image collection. Defaults to False.

False
extra_metadata_keys Optional[List[str]]

extra metadata keys to add to the geodataframe. Defaults to None.

None

Returns:

Type Description
Union[GeoDataFrame, Tuple[GeoDataFrame, ImageCollection]]

Union[gpd.GeoDataFrame, Tuple[gpd.GeoDataFrame, ee.ImageCollection]]: geodataframe with available products in the given area and time range

Source code in georeader/readers/ee_query.py
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
def query_landsat_457(area:Union[MultiPolygon,Polygon],
                      date_start:datetime, date_end:datetime,
                      producttype:str="all",
                      filter_duplicates:bool=True,
                      return_collection:bool=False,
                      extra_metadata_keys:Optional[List[str]]=None
          )-> Union[gpd.GeoDataFrame, Tuple[gpd.GeoDataFrame, ee.ImageCollection]]:
    """
    Query Landsat-7, Landsat-5 or Landsat-4 products from the Google Earth Engine.

    Args:
        area (Union[MultiPolygon,Polygon]): area to query images in EPSG:4326
        date_start (datetime): datetime in a given timezone. If tz not provided UTC will be assumed.
        date_end (datetime): datetime in UTC. If tz not provided UTC will be assumed.
        producttype (str, optional): 'all' -> {"L4", "L5", "L7"}, "L4", "L5" or "L7". Defaults to "all".
        filter_duplicates (bool, optional): filter duplicate images over the same area. Defaults to True.
        return_collection (bool, optional): returns also the corresponding image collection. Defaults to False.
        extra_metadata_keys (Optional[List[str]], optional): extra metadata keys to add to the geodataframe. Defaults to None.

    Returns:
        Union[gpd.GeoDataFrame, Tuple[gpd.GeoDataFrame, ee.ImageCollection]]: geodataframe with available products in the given area and time range
    """

    pol = ee.Geometry(mapping(area))

    if date_start.tzinfo is not None:
        tz = date_start.tzinfo
        if isinstance(tz, ZoneInfo):
            tz = tz.key

        date_start = date_start.astimezone(timezone.utc)
        date_end = date_end.astimezone(timezone.utc)
    else:
        tz = timezone.utc

    assert date_end >= date_start, f"Date end: {date_end} prior to date start: {date_start}"

    if extra_metadata_keys is None:
        extra_metadata_keys = []

    if producttype == "all" or producttype == "L5":
        image_collection_name = "LANDSAT/LT05/C02/T1_TOA"
    elif producttype == "L4":
        image_collection_name = "LANDSAT/LT04/C02/T1_TOA"
    elif producttype == "L7":
        image_collection_name = "LANDSAT/LE07/C02/T1_TOA"
    else:
        raise NotImplementedError(f"Unknown product type {producttype}")

    keys_query = {"LANDSAT_PRODUCT_ID": "title", 'CLOUD_COVER': "cloudcoverpercentage"}
    img_col = ee.ImageCollection(image_collection_name).filterDate(date_start.replace(tzinfo=None),
                                                                   date_end.replace(tzinfo=None)).filterBounds(
        pol)
    # Merge T2 collection
    img_col_t2 = ee.ImageCollection(image_collection_name.replace("T1", "T2")).filterDate(date_start.replace(tzinfo=None),
                                                                   date_end.replace(tzinfo=None)).filterBounds(
        pol)
    img_col = img_col.merge(img_col_t2)

    if producttype == "all":
        img_col_l4 = ee.ImageCollection("LANDSAT/LT04/C02/T1_TOA").filterDate(date_start.replace(tzinfo=None),
                                                                   date_end.replace(tzinfo=None)).filterBounds(
        pol)
        img_col_l4_t2 = ee.ImageCollection("LANDSAT/LT04/C02/T2_TOA").filterDate(date_start.replace(tzinfo=None),
                                                                     date_end.replace(tzinfo=None)).filterBounds(
            pol)
        img_col_l4 = img_col_l4.merge(img_col_l4_t2)
        img_col = img_col.merge(img_col_l4)

        # Add L7 T1 and T2
        img_col_l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_TOA").filterDate(date_start.replace(tzinfo=None),
                                                                   date_end.replace(tzinfo=None)).filterBounds(
        pol)
        img_col_l7_t2 = ee.ImageCollection("LANDSAT/LE07/C02/T2_TOA").filterDate(date_start.replace(tzinfo=None),
                                                                        date_end.replace(tzinfo=None)).filterBounds(
                pol)
        img_col_l7 = img_col_l7.merge(img_col_l7_t2)
        img_col = img_col.merge(img_col_l7)

    geodf = img_collection_to_feature_collection(img_col,
                                                 ["system:time_start"] + list(keys_query.keys()) + extra_metadata_keys,
                                                as_geopandas=True, band_crs="B2")

    geodf.rename(keys_query, axis=1, inplace=True)

    if geodf.shape[0] == 0:
        warnings.warn(f"Not images found of collection {producttype} between dates {date_start} and {date_end}")
        if return_collection:
            return geodf, img_col
        return geodf

    img_col = img_col.map(lambda x: _rename_add_properties(x, keys_query))
    geodf["collection_name"] = geodf.title.apply(lambda x: figure_out_collection_landsat(x))

    geodf = _add_stuff(geodf, area, tz)

    # Fix ids of Landsat to remove initial shit in the names
    geodf["gee_id"] = geodf["gee_id"].apply(lambda x: "L"+x.split("L")[1])

    if filter_duplicates:
        geodf = query_utils.filter_products_overlap(area, geodf,
                                                    groupkey=["solarday", "satellite"]).copy()
        # filter img_col:
        img_col = img_col.filter(ee.Filter.inList("title", ee.List(geodf.index.tolist())))

    geodf.sort_values("utcdatetime")
    img_col = img_col.sort("system:time_start")

    if return_collection:
        return geodf, img_col

    return geodf