Generate an ATOM entry for the catalog and perform its ingestion

In this scenario, we generate an ATOM entry containing the metadata of a product and feed it into the catalog (we use a sample product downloaded from a public repository).

1. Set the necessary variables

The following section defines all the necessary information as variables so the code below can be easily reused.

[ ]:
import getpass

# Set the credentials (Ellip username and API key)
username = raw_input("What is your Ellip username? ")
api_key = getpass.getpass("What is your Ellip API key? ")

# Set the name of the destination index on the Terradue catalog
index_name = raw_input("What is the destination index name? (press Enter to confirm default [{0}]) ".format(username))

if not index_name:
    index_name = username

# Set the catalog endpoint URL
endpoint = "https://catalog.terradue.com/{0}".format(index_name)

2. Define a function to generate an EarthObservation extension element

[ ]:
import lxml.etree as etree
import numpy as np
from shapely.wkt import loads

def eop_metadata(metadata):

    # Define namespace URIs
    opt = 'http://www.opengis.net/opt/2.1'
    om  = 'http://www.opengis.net/om/2.0'
    gml = 'http://www.opengis.net/gml/3.2'
    eop = 'http://www.opengis.net/eop/2.1'
    sar = 'http://www.opengis.net/sar/2.1'

    # Define the element structure
    # There are several levels for much of the content and many elements are only containers for other elements;
    # elements that hold actual values are marked as 'content element' below.
    root = etree.Element('{%s}EarthObservation' % opt)

    phenomenon_time = etree.SubElement(root, '{%s}phenomenonTime' % om)
    time_period = etree.SubElement(phenomenon_time, '{%s}TimePeriod' % gml)
    # Content element:
    begin_position = etree.SubElement(time_period, '{%s}beginPosition'  % gml)
    # Content element:
    end_position = etree.SubElement(time_period, '{%s}endPosition'  % gml)

    procedure = etree.SubElement(root, '{%s}procedure' % om)
    earth_observation_equipment = etree.SubElement(procedure, '{%s}EarthObservationEquipment' % eop)
    acquisition = etree.SubElement(earth_observation_equipment, '{%s}acquisitionParameters' % eop)
    # Content element:
    orbit_number = etree.SubElement(acquisition, '{%s}orbitNumber' % eop)
    # Content element:
    wrs_longitude_grid = etree.SubElement(acquisition, '{%s}wrsLongitudeGrid' % eop)
    # Content element:
    orbit_direction = etree.SubElement(acquisition, '{%s}orbitDirection' % eop)

    feature_of_interest = etree.SubElement(root, '{%s}featureOfInterest' % om)
    footprint = etree.SubElement(feature_of_interest, '{%s}Footprint' % eop)
    multi_extentOf = etree.SubElement(footprint, '{%s}multiExtentOf' % eop)
    multi_surface = etree.SubElement(multi_extentOf, '{%s}MultiSurface' % gml)
    surface_members = etree.SubElement(multi_surface, '{%s}surfaceMembers' % gml)
    polygon = etree.SubElement(surface_members, '{%s}Polygon' % gml)
    exterior = etree.SubElement(polygon, '{%s}exterior' % gml)
    linear_ring = etree.SubElement(exterior, '{%s}LinearRing' % gml)
    # Content element:
    poslist = etree.SubElement(linear_ring, '{%s}posList' % gml)

    result = etree.SubElement(root, '{%s}result' % om)
    earth_observation_result = etree.SubElement(result, '{%s}EarthObservationResult' % opt)
    # Content element:
    cloud_cover_percentage = etree.SubElement(earth_observation_result, '{%s}cloudCoverPercentage' % opt)

    metadata_property = etree.SubElement(root, '{%s}metaDataProperty' % eop)
    earth_observation_metadata = etree.SubElement(metadata_property, '{%s}EarthObservationMetaData' % eop)
    # Content element:
    identifier = etree.SubElement(earth_observation_metadata, '{%s}identifier' % eop)
    # Content element:
    product_type = etree.SubElement(earth_observation_metadata, '{%s}productType' % eop)

    vendor_specific = etree.SubElement(earth_observation_metadata, '{%s}vendorSpecific' % eop)
    specific_information = etree.SubElement(vendor_specific, '{%s}SpecificInformation' % eop)
    # Content element:
    local_attribute = etree.SubElement(specific_information, '{%s}localAttribute' % eop)
    # Content element:
    local_value = etree.SubElement(specific_information, '{%s}localValue' % eop)

    # Set values for content elements
    begin_position.text = metadata['startdate']
    end_position.text = metadata['enddate']
    orbit_number.text = metadata['orbitNumber']
    wrs_longitude_grid.text = metadata['wrsLongitudeGrid']
    orbit_direction.text = metadata['orbitDirection']

    coords = np.asarray([t[::-1] for t in list(loads(metadata['wkt']).exterior.coords)]).tolist()
    pos_list = ''
    for elem in coords:
        pos_list += ' '.join(str(e) for e in elem) + ' '

    poslist.attrib['count'] = str(len(coords))
    poslist.text = pos_list

    cloud_cover_percentage.text = metadata['cc']

    identifier.text = metadata['identifier']
    product_type.text = metadata['productType']

    local_attribute.text = 'MY_ATTRIBUTE'
    local_value.text = metadata['MY_ATTRIBUTE']

    return root

3. Define a class for ATOM manipulation

This class allows us to add basic elements such as an identifier, a title, an enclosure link and a product date; and to append the EarthObservation extension created above.

[ ]:
import lxml.etree as etree
import sys
import os
import string
import hashlib
import urllib2
import base64
import time

class Atom:
    tree = None
    root = None
    entry = None

    def __init__(self, root):
        self.root = root
        self.tree = root
        self.links = self.root.xpath('/a:feed/a:entry/a:link', namespaces={'a':'http://www.w3.org/2005/Atom'})
        entries = self.root.xpath('/a:feed/a:entry', namespaces={'a':'http://www.w3.org/2005/Atom'})
        if len(entries) > 0:
            self.entry = entries[0]

    @staticmethod
    def from_template():
        template = """<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <entry>
    <title type="text"></title>
    <summary type="html"></summary>
    <link rel="enclosure" type="application/octet-stream" href=""/>
    <date xmlns="http://purl.org/dc/elements/1.1/"></date>
    <published></published>
    <identifier xmlns="http://purl.org/dc/elements/1.1/"></identifier>
  </entry>
</feed>"""
        tree = etree.fromstring(template)
        return Atom(tree)


    @staticmethod
    def load(url, username=None, api_key=None):
        """Load and return the atom file at the location url
        """

        request = urllib2.Request(url)

        if ( username != None ):
            base64string = base64.b64encode('%s:%s' % (username, api_key))
            request.add_header("Authorization", "Basic %s" % base64string)
        fp = urllib2.urlopen(request)
        tree = etree.parse(fp)
        fp.close()
        if ( tree.getroot().tag != "{http://www.w3.org/2005/Atom}feed" ):
            raise ValueError('not an Atom feed')

        return Atom(tree)


    def set_identifier(self, identifier):
        """Set first atom entry identifier
        """

        el_identifier = self.root.xpath('/a:feed/a:entry/d:identifier',
                          namespaces={'a':'http://www.w3.org/2005/Atom',
                           'd':'http://purl.org/dc/elements/1.1/'})

        el_identifier[0].text = identifier

    def get_identifier(self):
        el_identifier = self.root.xpath('/a:feed/a:entry/d:identifier',
                          namespaces={'a':'http://www.w3.org/2005/Atom',
                           'd':'http://purl.org/dc/elements/1.1/'})

        if (len(el_identifier) == 0):
            return None

        return el_identifier[0].text;

    def get_total_results(self, create=False):
        # get OS total results in feed
        totalResults = self.root.xpath('/a:feed/os:totalResults', namespaces={'a':'http://www.w3.org/2005/Atom', 'os':'http://a9.com/-/spec/opensearch/1.1/'})

        if (len(totalResults) == 0):
            return None

        return int(totalResults[0].text)

    def get_title(self, create=False):
        # get or create title
        titles = self.root.xpath('/a:feed/a:entry/a:title', namespaces={'a':'http://www.w3.org/2005/Atom'})

        if (len(titles) == 0):
            if (create):
                titles = [etree.SubElement(self.entry, "{http://www.w3.org/2005/Atom}title")]
                return titles[0]
            return None

        return titles[0]

    def set_title_text(self, text):
        """Set first atom entry title
        """

        el_title = self.root.xpath('/a:feed/a:entry/a:title',
                          namespaces={'a':'http://www.w3.org/2005/Atom'})

        el_title[0].text = text

    def get_summary(self, create=False):
        # get or create summary
        summaries = self.root.xpath('/a:feed/a:entry/a:summary', namespaces={'a':'http://www.w3.org/2005/Atom'})

        if (len(summaries) == 0):
            if (create):
                summaries = [etree.SubElement(self.entry, "{http://www.w3.org/2005/Atom}summary")]
                return summaries[0]
            return None

        return summaries[0]

    def set_summary_text(self, text):
        # get or create summary
        summary = self.get_summary(True)

        summary.text = text

    def get_links(self, rel_type):
        # get links
        return self.root.xpath('/a:feed/a:entry/a:link[@rel = "{0}"]'.format(rel_type), namespaces={'a':'http://www.w3.org/2005/Atom'})


    def set_enclosure_link(self, href, title):

        el_enclosure_link = self.root.xpath('/a:feed/a:entry/a:link[@rel="enclosure" and (@href="" or @href="{0}")]'.format(href),
                             namespaces={'a':'http://www.w3.org/2005/Atom'})

        if (len(el_enclosure_link) > 0):
            link = el_enclosure_link[0]
            link.attrib['href'] = href
        else:
            link = self.add_enclosure_link(href, title)


    def add_enclosure_link(self, href, title):

        xml_string = '<link rel="enclosure" type="application/octet-stream" title="%s" href="%s"/>' % (title, href.replace('&', '&amp;'))
        print(xml_string)

        link = etree.fromstring(xml_string)
        self.entry.append(link)

        return link


    def add_extension(self, xml_ext):

        el_entry = self.root.xpath('/a:feed/a:entry/a:link',
                             namespaces={'a':'http://www.w3.org/2005/Atom'})

        el_entry[0].addnext(xml_ext)

    def add_link(self, href, rel, title=None, type=None):

        link = etree.SubElement(self.root.xpath('/a:feed/a:entry',
                      namespaces={'a':'http://www.w3.org/2005/Atom'})[0], "{http://www.w3.org/2005/Atom}link")

        link.attrib['href'] = href
        link.attrib['rel'] = rel
        if title:
            link.attrib['title'] = title
        if type:
            link.attrib['type'] = type


    def remove_link(self, rel, link_title=None, link_type=None, link_url=None):
        links = self.get_links(rel)
        filter = None
        value = None

        if link_title:
            filter = 'title'
            value = link_title
        elif link_type:
            filter = 'type'
            value = link_type
        elif link_url:
            filter = 'url'
            value = link_url
        else:
            raise Exception("Required parameter link_title, link_type or link_url")

        for link in links:
            if link.attrib[filter] == value:
                link.getparent().remove(link)



    def get_offering_elements(self, offering_code):

        return self.root.xpath('/a:feed/a:entry/b:offering[@code="{0}"]'.format(offering_code),
                           namespaces={'a':'http://www.w3.org/2005/Atom',
                                       'b':'http://www.opengis.net/owc/1.0'})


    @staticmethod
    def get_operation_elements(offering_element, operation_code=None):

        xpath = 'b:operation'
        if (operation_code):
            xpath += '[@code="{0}"]'.format(operation_code)
        return offering_element.xpath(xpath, namespaces={'b':'http://www.opengis.net/owc/1.0'})


    def add_offering(self, offering):

        self.root.xpath('/a:feed/a:entry', namespaces={'a':'http://www.w3.org/2005/Atom'})[0].append(offering)


    def add_offerings(self, offerings):

        for offering in offerings:
            self.add_offering(offering)


    def get_dctspatial(self, create=False):

        # get or create summary
        spatials = self.root.xpath('/a:feed/a:entry/c:spatial',
                          namespaces={'a':'http://www.w3.org/2005/Atom',
                           'c':'http://purl.org/dc/terms/'})

        if (len(spatials) == 0):
            if (create):
                spatials = [etree.SubElement(self.entry, "{http://purl.org/dc/terms/}spatial")]
                return spatials[0]
            return None

        return spatials[0]

    def set_dctspatial(self, wkt):

        el_spatial = self.get_dctspatial(True)

        el_spatial.text = wkt

    def get_dcdate(self, create):

        # get or create dcdate
        el_dates = self.root.xpath('/a:feed/a:entry/d:date',
                          namespaces={'a':'http://www.w3.org/2005/Atom',
                               'd':'http://purl.org/dc/elements/1.1/'})

        if (len(el_dates) == 0):
            if (create):
                el_dates = [etree.SubElement(self.entry, "{http://purl.org/dc/elements/1.1/}date")]
                return el_dates[0]
            return None

        return el_dates[0]

    def set_dcdate(self, date):

        # get or create dcdate
        dcdate = self.get_dcdate(True)

        dcdate.text = date


    def set_published(self, published):

        el_published = self.root.xpath('/a:feed/a:entry/a:published',
                      namespaces={'a':'http://www.w3.org/2005/Atom'})
        el_published[0].text = published

    def get_category_by_scheme(self, scheme):

        categories = self.root.xpath('/a:feed/a:entry/a:category[@scheme="{0}"]'.format(scheme), namespaces={'a':'http://www.w3.org/2005/Atom'})
        if (len(categories) == 0):
            return None

        return categories[0]

    def get_categories(self, term, scheme=None):

        # get categories
        filter = '@term="{0}"'.format(term)
        if scheme != None:
            filter = '{0} and @scheme="{1}"'.format(filter, scheme)

        return self.root.xpath('/a:feed/a:entry/a:category[{0}]'.format(filter), namespaces={'a':'http://www.w3.org/2005/Atom'})

    def remove_category(self, term, scheme=None):

        # get and remove category
        for category in self.get_categories(term, scheme):
            category.getparent().remove(category)

    def remove_category_by_scheme(self, scheme):

        # get categories
        filter = '@scheme="{0}"'.format(scheme)

        categories = self.root.xpath('/a:feed/a:entry/a:category[{0}]'.format(filter), namespaces={'a':'http://www.w3.org/2005/Atom'})
        for category in categories:
            category.getparent().remove(category)

    def set_category(self, term, label=None, scheme=None):

        categories = self.get_categories(term, scheme)

        if (len(categories) == 0):
            categories = [etree.SubElement(self.entry, "{http://www.w3.org/2005/Atom}category")]

        categories[0].attrib['term']  = term
        if label != None:
            categories[0].attrib['label']  = label
        if scheme != None:
            categories[0].attrib['scheme']  = scheme


    def set_generator(self, uri, version, text):

       # get or create generator
        el_generator = self.root.xpath('/a:feed/a:entry/a:generator', namespaces={'a':'http://www.w3.org/2005/Atom'})

        if (len(el_generator) == 0):
            el_generator = [etree.SubElement(self.root.xpath('/a:feed/a:entry',
                      namespaces={'a':'http://www.w3.org/2005/Atom'})[0], "{http://www.w3.org/2005/Atom}generator")]

        el_generator[0].attrib['uri'] = uri
        el_generator[0].attrib['version'] = version
        el_generator[0].text = text


    def append_summary_html(self, text):
        """Append atom summary with text
        """

        html_summary = self.get_summary(True).text
        html_summary += "<p>%s</p>" % text

        self.set_summary_text(html_summary)


    def to_string(self, pretty_print = True):

        return etree.tostring(self.tree, pretty_print=pretty_print)

    def clear_enclosures(self):

        links = self.get_links("enclosure")
        for link in links:
            link.getparent().remove(link)

    def get_extensions(self, name, namespace):

        return self.root.xpath('/a:feed/a:entry/e:{0}'.format(name),
                           namespaces={'a':'http://www.w3.org/2005/Atom',
                                       'e':namespace})

4. Build the EarthObservation extension element

We define a dictionary containing the metadata and use it as argument for the function defined above.

[ ]:
metadata = {'startdate':       '2019-01-02T03:04:05.678Z',
            'enddate':         '2019-01-02T03:05:06.789Z',
            'orbitNumber':     '99',
            'wrsLongitudeGrid':'123',
            'orbitDirection':  'DESCENDING',
            'wkt':             'POLYGON((10.1 10.2,20.3 10.4,20.5 20.6,10.7 20.8,10.1 10.2))',
            'cc':              '55',
            'identifier' :     'MY_PRODUCT',
            'productType':     'MY_TYPE',
            'MY_ATTRIBUTE':    'MY_VALUE'
}

# Build the element
eo = eop_metadata(metadata)

Show the EarthObservation element just created:

[ ]:
print(etree.tostring(eo, pretty_print=True))

5. Build an ATOM feed

We create an ATOM feed with one entry to which we append the extension created above.

[ ]:
import datetime

atom = Atom.from_template()
atom.set_identifier(metadata['identifier'])
atom.set_title_text("Title for MY_PRODUCT")
atom.set_summary_text("This is the summary for MY_PRODUCT")
atom.set_dcdate("{0}/{1}".format(metadata['startdate'], metadata['enddate']))
atom.set_published("{0}Z".format(datetime.datetime.now().isoformat()))

atom.add_extension(eo)

atom.set_enclosure_link("https://store.terradue.com/myindex/MY_PRODUCT.tif", "Location on storage")

Show the resulting ATOM feed:

[ ]:
print(etree.tostring(atom.root, pretty_print=True))

6. Post the ATOM feed

We post the ATOM feed to an index on the catalog (variables are defined in the first step).

[ ]:
import requests

request = requests.post(endpoint,
                        headers={"Content-Type": "application/atom+xml", "Accept": "application/xml"},
                        auth=(username, api_key),
                        data=atom.to_string()
)

if request.status_code == 200:
    print('Data item updated at {0}/search?uid={1}&apikey={2} ({3})'.format(endpoint, atom.get_identifier(), api_key, str(request.status_code)))
else:
    print('Data item NOT updated at {0}/search?uid={1}&apikey={2} ({3})'.format(endpoint, atom.get_identifier(), api_key, str(request.status_code)))

If a product URL with status 200 is displayed, the ATOM feed has been successfully uploaded and the product information is available on the Terradue catalog.

END