Sitemap Class¶
This class contains several function methods to read and fetch the website sitemap from a local or a remote file location. You can use write function to save the file to any file location.
Source code in pysitemaps/__init__.py
class Sitemap:
"""Sitemap: A class to represent a Sitemap.
This class contains several function methods to read and fetch the website sitemap from a local or a remote file location.
You can use write function to save the file to any file location.
"""
def __init__(
self,
website_name: str = None,
file_path: str = "",
xsl_file: str = "",
) -> None:
"""Initlaize Sitemap Object
Args:
website_name (str, optional): Webiste Name. Defaults to None.
file_path (str, optional): Path of Sitemap.xml. Defaults to "".
xsl_file (str, optional): Path of xsl_file. Defaults to "".
"""
if website_name:
self.website_name = website_name
else:
raise "Cannot Create Sitemap object. Please add sitename to the argument"
self.xsl_file = xsl_file
self.file_path = file_path
self.content = {
"parent": XmlDocument(file_path),
"sub_sitemaps": [],
}
def read(self, file_path: str = "") -> None:
"""Read sitemap from local file_path
If not specified then file_path specified at the time creation of Sitemap objet.
Args:
file_path (str, optional): Sitemap file path. Defaults to "".
"""
if not file_path:
file_path = self.file_path
if file_path.endswith("xml"):
with open(file_path, "r") as f:
xml_as_text = f.read()
self.xsl_file = extract_xsl_file(xml_as_text=xml_as_text)
self.content["parent"] = XmlDocument(file_path)
self.content["parent"].add_from_text(xml_as_text)
self.content["sub_sitemaps"] += extract_sub_sitemaps(xml_as_text)
def fetch(self, file_path: str = "", include_urls: bool = False) -> None:
"""fetch remote sitemap.
If File name is not specified then function will locate is by browsing the website.
Args:
file_path (str, optional): Url Path of sitemap. Defaults to "".
include_urls (bool, optional): If true then Urls present in the sitemap will be included. Defaults to False.
"""
sitemaps = [file_path]
if not file_path.endswith("xml"):
sitemaps = search_sitemap(self.website_name)
for sitemap in sitemaps:
if sitemap.endswith("xml"):
self.content["parent"] = XmlDocument(
sitemap, include_urls=include_urls
)
response = get_remote_content(sitemap)
if response.status_code < 400:
self.xsl_file = extract_xsl_file(xml_as_text=response.text)
self.content["sub_sitemaps"] += extract_sub_sitemaps(
response.text, include_urls=include_urls
)
def append(self, object_to_append) -> None:
"""Append any of XmlDocument, Url, dict Object
Args:
object_to_append (XmlDocument | Url | dict): append Url to current Sitemap
"""
if isinstance(object_to_append, XmlDocument):
self.content["sub_sitemaps"].append(object_to_append)
elif isinstance(object_to_append, Url):
self.content["parent"].add_object(object_to_append)
elif isinstance(object_to_append, dict):
self.content["parent"].add_url(
object_to_append["loc"],
lastmod=object_to_append["lastmod"],
images_loc=object_to_append["images_loc"],
)
def as_dict(self) -> dict:
"""return Stimeap object as dict.
Returns:
dict: contains 'parent', 'xsl-file' and 'sub_sitemaps'
"""
return {
"parent": self.content["parent"].as_dict(),
"xsl-file": self.xsl_file,
"sub_sitemaps": [
sub_sitemap.as_dict() for sub_sitemap in self.content["sub_sitemaps"]
],
}
def write(
self,
path: str = "",
) -> None:
"""write Sitemap to xml file
Args:
path (str, optional): specify output path/folder location (without file name). Defaults to "".
"""
parent_sitemap = self.content["parent"]
sub_sitemaps = self.content["sub_sitemaps"]
for sub_sitemap in sub_sitemaps:
sitemap_name = sub_sitemap.as_dict()["loc"].split("/")[-1]
url_set = sub_sitemap.as_dict()["urls"]
if url_set:
write_sub_sitemap(
url_set,
self.website_name,
self.xsl_file,
path=path,
file_name=sitemap_name,
)
if sub_sitemaps and parent_sitemap:
sitemap_name = parent_sitemap.as_dict()["loc"].split("/")[-1]
sub_sitemaps_set = [
{"loc": item.as_dict()["loc"], "lastmod": item.as_dict()["lastmod"]}
for item in self.content["sub_sitemaps"]
]
if sub_sitemaps_set:
write_index_sitemap(
sub_sitemaps_set,
self.website_name,
self.xsl_file,
path=path,
file_name=sitemap_name,
)
elif parent_sitemap:
sitemap_name = parent_sitemap.as_dict()["loc"].split("/")[-1]
url_set = parent_sitemap.as_dict()["urls"]
if url_set:
write_sub_sitemap(
url_set,
self.website_name,
self.xsl_file,
path=path,
file_name=sitemap_name,
)
__init__(website_name=None, file_path='', xsl_file='')
¶
Initlaize Sitemap Object
Parameters:
Name | Type | Description | Default |
---|---|---|---|
website_name |
str
|
Webiste Name. Defaults to None. |
None
|
file_path |
str
|
Path of Sitemap.xml. Defaults to “”. |
''
|
xsl_file |
str
|
Path of xsl_file. Defaults to “”. |
''
|
Source code in pysitemaps/__init__.py
def __init__(
self,
website_name: str = None,
file_path: str = "",
xsl_file: str = "",
) -> None:
"""Initlaize Sitemap Object
Args:
website_name (str, optional): Webiste Name. Defaults to None.
file_path (str, optional): Path of Sitemap.xml. Defaults to "".
xsl_file (str, optional): Path of xsl_file. Defaults to "".
"""
if website_name:
self.website_name = website_name
else:
raise "Cannot Create Sitemap object. Please add sitename to the argument"
self.xsl_file = xsl_file
self.file_path = file_path
self.content = {
"parent": XmlDocument(file_path),
"sub_sitemaps": [],
}
append(object_to_append)
¶
Append any of XmlDocument, Url, dict Object
Parameters:
Name | Type | Description | Default |
---|---|---|---|
object_to_append |
XmlDocument | Url | dict
|
append Url to current Sitemap |
required |
Source code in pysitemaps/__init__.py
def append(self, object_to_append) -> None:
"""Append any of XmlDocument, Url, dict Object
Args:
object_to_append (XmlDocument | Url | dict): append Url to current Sitemap
"""
if isinstance(object_to_append, XmlDocument):
self.content["sub_sitemaps"].append(object_to_append)
elif isinstance(object_to_append, Url):
self.content["parent"].add_object(object_to_append)
elif isinstance(object_to_append, dict):
self.content["parent"].add_url(
object_to_append["loc"],
lastmod=object_to_append["lastmod"],
images_loc=object_to_append["images_loc"],
)
as_dict()
¶
return Stimeap object as dict.
Returns:
Name | Type | Description |
---|---|---|
dict |
dict
|
contains ‘parent’, ‘xsl-file’ and ‘sub_sitemaps’ |
Source code in pysitemaps/__init__.py
def as_dict(self) -> dict:
"""return Stimeap object as dict.
Returns:
dict: contains 'parent', 'xsl-file' and 'sub_sitemaps'
"""
return {
"parent": self.content["parent"].as_dict(),
"xsl-file": self.xsl_file,
"sub_sitemaps": [
sub_sitemap.as_dict() for sub_sitemap in self.content["sub_sitemaps"]
],
}
fetch(file_path='', include_urls=False)
¶
fetch remote sitemap.
If File name is not specified then function will locate is by browsing the website.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
file_path |
str
|
Url Path of sitemap. Defaults to “”. |
''
|
include_urls |
bool
|
If true then Urls present in the sitemap will be included. Defaults to False. |
False
|
Source code in pysitemaps/__init__.py
def fetch(self, file_path: str = "", include_urls: bool = False) -> None:
"""fetch remote sitemap.
If File name is not specified then function will locate is by browsing the website.
Args:
file_path (str, optional): Url Path of sitemap. Defaults to "".
include_urls (bool, optional): If true then Urls present in the sitemap will be included. Defaults to False.
"""
sitemaps = [file_path]
if not file_path.endswith("xml"):
sitemaps = search_sitemap(self.website_name)
for sitemap in sitemaps:
if sitemap.endswith("xml"):
self.content["parent"] = XmlDocument(
sitemap, include_urls=include_urls
)
response = get_remote_content(sitemap)
if response.status_code < 400:
self.xsl_file = extract_xsl_file(xml_as_text=response.text)
self.content["sub_sitemaps"] += extract_sub_sitemaps(
response.text, include_urls=include_urls
)
read(file_path='')
¶
Read sitemap from local file_path
If not specified then file_path specified at the time creation of Sitemap objet.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
file_path |
str
|
Sitemap file path. Defaults to “”. |
''
|
Source code in pysitemaps/__init__.py
def read(self, file_path: str = "") -> None:
"""Read sitemap from local file_path
If not specified then file_path specified at the time creation of Sitemap objet.
Args:
file_path (str, optional): Sitemap file path. Defaults to "".
"""
if not file_path:
file_path = self.file_path
if file_path.endswith("xml"):
with open(file_path, "r") as f:
xml_as_text = f.read()
self.xsl_file = extract_xsl_file(xml_as_text=xml_as_text)
self.content["parent"] = XmlDocument(file_path)
self.content["parent"].add_from_text(xml_as_text)
self.content["sub_sitemaps"] += extract_sub_sitemaps(xml_as_text)
write(path='')
¶
write Sitemap to xml file
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str
|
specify output path/folder location (without file name). Defaults to “”. |
''
|
Source code in pysitemaps/__init__.py
def write(
self,
path: str = "",
) -> None:
"""write Sitemap to xml file
Args:
path (str, optional): specify output path/folder location (without file name). Defaults to "".
"""
parent_sitemap = self.content["parent"]
sub_sitemaps = self.content["sub_sitemaps"]
for sub_sitemap in sub_sitemaps:
sitemap_name = sub_sitemap.as_dict()["loc"].split("/")[-1]
url_set = sub_sitemap.as_dict()["urls"]
if url_set:
write_sub_sitemap(
url_set,
self.website_name,
self.xsl_file,
path=path,
file_name=sitemap_name,
)
if sub_sitemaps and parent_sitemap:
sitemap_name = parent_sitemap.as_dict()["loc"].split("/")[-1]
sub_sitemaps_set = [
{"loc": item.as_dict()["loc"], "lastmod": item.as_dict()["lastmod"]}
for item in self.content["sub_sitemaps"]
]
if sub_sitemaps_set:
write_index_sitemap(
sub_sitemaps_set,
self.website_name,
self.xsl_file,
path=path,
file_name=sitemap_name,
)
elif parent_sitemap:
sitemap_name = parent_sitemap.as_dict()["loc"].split("/")[-1]
url_set = parent_sitemap.as_dict()["urls"]
if url_set:
write_sub_sitemap(
url_set,
self.website_name,
self.xsl_file,
path=path,
file_name=sitemap_name,
)