Skip to content
Snippets Groups Projects
Unverified Commit 4079020d authored by frasergr's avatar frasergr Committed by GitHub
Browse files

dockerfile cleanup; enforce text LF line endings (#81)

parent 3945a772
No related branches found
No related tags found
No related merge requests found
* text=auto eol=lf
\ No newline at end of file
import requests import requests
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from scripts.link import parse_links from scripts.link import parse_links
import re import re
def parse_sitemap(url): def parse_sitemap(url):
response = requests.get(url) response = requests.get(url)
root = ET.fromstring(response.content) root = ET.fromstring(response.content)
urls = [] urls = []
for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'): for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'): for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
if not has_extension_to_ignore(loc.text): if not has_extension_to_ignore(loc.text):
urls.append(loc.text) urls.append(loc.text)
else: else:
print(f"Skipping filetype: {loc.text}") print(f"Skipping filetype: {loc.text}")
return urls return urls
# Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml # Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml
def sitemap(): def sitemap():
sitemap_url = input("Enter the URL of the sitemap: ") sitemap_url = input("Enter the URL of the sitemap: ")
if(len(sitemap_url) == 0): if(len(sitemap_url) == 0):
print("No valid sitemap provided!") print("No valid sitemap provided!")
exit(1) exit(1)
url_array = parse_sitemap(sitemap_url) url_array = parse_sitemap(sitemap_url)
#parse links from array #parse links from array
parse_links(url_array) parse_links(url_array)
def has_extension_to_ignore(string): def has_extension_to_ignore(string):
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf'] image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf']
pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b' pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b'
match = re.search(pattern, string, re.IGNORECASE) match = re.search(pattern, string, re.IGNORECASE)
return match is not None return match is not None
\ No newline at end of file
...@@ -34,12 +34,10 @@ RUN groupadd -g $ARG_GID anythingllm && \ ...@@ -34,12 +34,10 @@ RUN groupadd -g $ARG_GID anythingllm && \
# Copy docker helper scripts # Copy docker helper scripts
COPY ./docker/docker-entrypoint.sh /usr/local/bin/ COPY ./docker/docker-entrypoint.sh /usr/local/bin/
COPY ./docker/docker-healthcheck.sh /usr/local/bin/ COPY ./docker/docker-healthcheck.sh /usr/local/bin/
COPY ./docker/dual_boot.sh /usr/local/bin/
# Ensure the scripts are executable # Ensure the scripts are executable
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \ RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
chmod +x /usr/local/bin/docker-healthcheck.sh && \ chmod +x /usr/local/bin/docker-healthcheck.sh
chmod 777 /usr/local/bin/dual_boot.sh
USER anythingllm USER anythingllm
...@@ -91,6 +89,4 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \ ...@@ -91,6 +89,4 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1 CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
# Run the server # Run the server
ENTRYPOINT ["docker-entrypoint.sh"] ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"]
\ No newline at end of file
CMD /bin/bash /usr/local/bin/dual_boot.sh
\ No newline at end of file
#!/usr/bin/env bash #!/bin/bash
node /app/server/index.js &
exec "$@" { FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
\ No newline at end of file wait -n
exit $?
\ No newline at end of file
#!/bin/bash
node /app/server/index.js &
{ FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
wait -n
exit $?
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment