Sitemap Class

This class contains several function methods to read and fetch the website sitemap from a local or a remote file location. You can use write function to save the file to any file location.

Source code in pysitemaps/__init__.py
class Sitemap:
    """Sitemap: A class to represent a Sitemap.

    This class contains several function methods to  read and fetch the website sitemap from a local or a remote file location.
    You can use write function to save the file to any file location.
    """

    def __init__(
        self,
        website_name: str = None,
        file_path: str = "",
        xsl_file: str = "",
    ) -> None:
        """Initlaize Sitemap Object

        Args:
            website_name (str, optional): Webiste Name. Defaults to None.
            file_path (str, optional): Path of Sitemap.xml. Defaults to "".
            xsl_file (str, optional): Path of xsl_file. Defaults to "".
        """
        if website_name:
            self.website_name = website_name
        else:
            raise "Cannot Create Sitemap object. Please add sitename to the argument"

        self.xsl_file = xsl_file
        self.file_path = file_path

        self.content = {
            "parent": XmlDocument(file_path),
            "sub_sitemaps": [],
        }

    def read(self, file_path: str = "") -> None:
        """Read sitemap from local file_path

        If not specified then file_path specified at the time creation of Sitemap objet.
        Args:
            file_path (str, optional): Sitemap file path. Defaults to "".
        """
        if not file_path:
            file_path = self.file_path

        if file_path.endswith("xml"):
            with open(file_path, "r") as f:
                xml_as_text = f.read()
            self.xsl_file = extract_xsl_file(xml_as_text=xml_as_text)
            self.content["parent"] = XmlDocument(file_path)
            self.content["parent"].add_from_text(xml_as_text)
            self.content["sub_sitemaps"] += extract_sub_sitemaps(xml_as_text)

    def fetch(self, file_path: str = "", include_urls: bool = False) -> None:
        """fetch remote sitemap.

        If File name is not specified then function will locate is by browsing the website.

        Args:
            file_path (str, optional): Url Path of sitemap. Defaults to "".
            include_urls (bool, optional): If true then Urls present in the sitemap will be included. Defaults to False.
        """
        sitemaps = [file_path]
        if not file_path.endswith("xml"):
            sitemaps = search_sitemap(self.website_name)

        for sitemap in sitemaps:
            if sitemap.endswith("xml"):
                self.content["parent"] = XmlDocument(
                    sitemap, include_urls=include_urls
                )
                response = get_remote_content(sitemap)
                if response.status_code < 400:
                    self.xsl_file = extract_xsl_file(xml_as_text=response.text)
                    self.content["sub_sitemaps"] += extract_sub_sitemaps(
                        response.text, include_urls=include_urls
                    )

    def append(self, object_to_append) -> None:
        """Append any of XmlDocument, Url, dict Object

        Args:
            object_to_append (XmlDocument | Url | dict): append Url to current Sitemap
        """

        if isinstance(object_to_append, XmlDocument):
            self.content["sub_sitemaps"].append(object_to_append)
        elif isinstance(object_to_append, Url):
            self.content["parent"].add_object(object_to_append)
        elif isinstance(object_to_append, dict):
            self.content["parent"].add_url(
                object_to_append["loc"],
                lastmod=object_to_append["lastmod"],
                images_loc=object_to_append["images_loc"],
            )

    def as_dict(self) -> dict:
        """return Stimeap object as dict.

        Returns:
            dict: contains 'parent', 'xsl-file' and 'sub_sitemaps'
        """
        return {
            "parent": self.content["parent"].as_dict(),
            "xsl-file": self.xsl_file,
            "sub_sitemaps": [
                sub_sitemap.as_dict() for sub_sitemap in self.content["sub_sitemaps"]
            ],
        }

    def write(
        self,
        path: str = "",
    ) -> None:
        """write Sitemap to xml file

        Args:
            path (str, optional): specify output path/folder location (without file name). Defaults to "".
        """
        parent_sitemap = self.content["parent"]
        sub_sitemaps = self.content["sub_sitemaps"]

        for sub_sitemap in sub_sitemaps:
            sitemap_name = sub_sitemap.as_dict()["loc"].split("/")[-1]
            url_set = sub_sitemap.as_dict()["urls"]
            if url_set:
                write_sub_sitemap(
                    url_set,
                    self.website_name,
                    self.xsl_file,
                    path=path,
                    file_name=sitemap_name,
                )

        if sub_sitemaps and parent_sitemap:
            sitemap_name = parent_sitemap.as_dict()["loc"].split("/")[-1]
            sub_sitemaps_set = [
                {"loc": item.as_dict()["loc"], "lastmod": item.as_dict()["lastmod"]}
                for item in self.content["sub_sitemaps"]
            ]
            if sub_sitemaps_set:
                write_index_sitemap(
                    sub_sitemaps_set,
                    self.website_name,
                    self.xsl_file,
                    path=path,
                    file_name=sitemap_name,
                )
        elif parent_sitemap:
            sitemap_name = parent_sitemap.as_dict()["loc"].split("/")[-1]
            url_set = parent_sitemap.as_dict()["urls"]
            if url_set:
                write_sub_sitemap(
                    url_set,
                    self.website_name,
                    self.xsl_file,
                    path=path,
                    file_name=sitemap_name,
                )

__init__(website_name=None, file_path='', xsl_file='')

Initlaize Sitemap Object

Parameters:

Name Type Description Default
website_name str

Webiste Name. Defaults to None.

None
file_path str

Path of Sitemap.xml. Defaults to “”.

''
xsl_file str

Path of xsl_file. Defaults to “”.

''
Source code in pysitemaps/__init__.py
def __init__(
    self,
    website_name: str = None,
    file_path: str = "",
    xsl_file: str = "",
) -> None:
    """Initlaize Sitemap Object

    Args:
        website_name (str, optional): Webiste Name. Defaults to None.
        file_path (str, optional): Path of Sitemap.xml. Defaults to "".
        xsl_file (str, optional): Path of xsl_file. Defaults to "".
    """
    if website_name:
        self.website_name = website_name
    else:
        raise "Cannot Create Sitemap object. Please add sitename to the argument"

    self.xsl_file = xsl_file
    self.file_path = file_path

    self.content = {
        "parent": XmlDocument(file_path),
        "sub_sitemaps": [],
    }

append(object_to_append)

Append any of XmlDocument, Url, dict Object

Parameters:

Name Type Description Default
object_to_append XmlDocument | Url | dict

append Url to current Sitemap

required
Source code in pysitemaps/__init__.py
def append(self, object_to_append) -> None:
    """Append any of XmlDocument, Url, dict Object

    Args:
        object_to_append (XmlDocument | Url | dict): append Url to current Sitemap
    """

    if isinstance(object_to_append, XmlDocument):
        self.content["sub_sitemaps"].append(object_to_append)
    elif isinstance(object_to_append, Url):
        self.content["parent"].add_object(object_to_append)
    elif isinstance(object_to_append, dict):
        self.content["parent"].add_url(
            object_to_append["loc"],
            lastmod=object_to_append["lastmod"],
            images_loc=object_to_append["images_loc"],
        )

as_dict()

return Stimeap object as dict.

Returns:

Name Type Description
dict dict

contains ‘parent’, ‘xsl-file’ and ‘sub_sitemaps’

Source code in pysitemaps/__init__.py
def as_dict(self) -> dict:
    """return Stimeap object as dict.

    Returns:
        dict: contains 'parent', 'xsl-file' and 'sub_sitemaps'
    """
    return {
        "parent": self.content["parent"].as_dict(),
        "xsl-file": self.xsl_file,
        "sub_sitemaps": [
            sub_sitemap.as_dict() for sub_sitemap in self.content["sub_sitemaps"]
        ],
    }

fetch(file_path='', include_urls=False)

fetch remote sitemap.

If File name is not specified then function will locate is by browsing the website.

Parameters:

Name Type Description Default
file_path str

Url Path of sitemap. Defaults to “”.

''
include_urls bool

If true then Urls present in the sitemap will be included. Defaults to False.

False
Source code in pysitemaps/__init__.py
def fetch(self, file_path: str = "", include_urls: bool = False) -> None:
    """fetch remote sitemap.

    If File name is not specified then function will locate is by browsing the website.

    Args:
        file_path (str, optional): Url Path of sitemap. Defaults to "".
        include_urls (bool, optional): If true then Urls present in the sitemap will be included. Defaults to False.
    """
    sitemaps = [file_path]
    if not file_path.endswith("xml"):
        sitemaps = search_sitemap(self.website_name)

    for sitemap in sitemaps:
        if sitemap.endswith("xml"):
            self.content["parent"] = XmlDocument(
                sitemap, include_urls=include_urls
            )
            response = get_remote_content(sitemap)
            if response.status_code < 400:
                self.xsl_file = extract_xsl_file(xml_as_text=response.text)
                self.content["sub_sitemaps"] += extract_sub_sitemaps(
                    response.text, include_urls=include_urls
                )

read(file_path='')

Read sitemap from local file_path

If not specified then file_path specified at the time creation of Sitemap objet.

Parameters:

Name Type Description Default
file_path str

Sitemap file path. Defaults to “”.

''
Source code in pysitemaps/__init__.py
def read(self, file_path: str = "") -> None:
    """Read sitemap from local file_path

    If not specified then file_path specified at the time creation of Sitemap objet.
    Args:
        file_path (str, optional): Sitemap file path. Defaults to "".
    """
    if not file_path:
        file_path = self.file_path

    if file_path.endswith("xml"):
        with open(file_path, "r") as f:
            xml_as_text = f.read()
        self.xsl_file = extract_xsl_file(xml_as_text=xml_as_text)
        self.content["parent"] = XmlDocument(file_path)
        self.content["parent"].add_from_text(xml_as_text)
        self.content["sub_sitemaps"] += extract_sub_sitemaps(xml_as_text)

write(path='')

write Sitemap to xml file

Parameters:

Name Type Description Default
path str

specify output path/folder location (without file name). Defaults to “”.

''
Source code in pysitemaps/__init__.py
def write(
    self,
    path: str = "",
) -> None:
    """write Sitemap to xml file

    Args:
        path (str, optional): specify output path/folder location (without file name). Defaults to "".
    """
    parent_sitemap = self.content["parent"]
    sub_sitemaps = self.content["sub_sitemaps"]

    for sub_sitemap in sub_sitemaps:
        sitemap_name = sub_sitemap.as_dict()["loc"].split("/")[-1]
        url_set = sub_sitemap.as_dict()["urls"]
        if url_set:
            write_sub_sitemap(
                url_set,
                self.website_name,
                self.xsl_file,
                path=path,
                file_name=sitemap_name,
            )

    if sub_sitemaps and parent_sitemap:
        sitemap_name = parent_sitemap.as_dict()["loc"].split("/")[-1]
        sub_sitemaps_set = [
            {"loc": item.as_dict()["loc"], "lastmod": item.as_dict()["lastmod"]}
            for item in self.content["sub_sitemaps"]
        ]
        if sub_sitemaps_set:
            write_index_sitemap(
                sub_sitemaps_set,
                self.website_name,
                self.xsl_file,
                path=path,
                file_name=sitemap_name,
            )
    elif parent_sitemap:
        sitemap_name = parent_sitemap.as_dict()["loc"].split("/")[-1]
        url_set = parent_sitemap.as_dict()["urls"]
        if url_set:
            write_sub_sitemap(
                url_set,
                self.website_name,
                self.xsl_file,
                path=path,
                file_name=sitemap_name,
            )