From b5d3e6dda4be85f55e4d568c4a34d29475c64036 Mon Sep 17 00:00:00 2001 From: Ashwath7349 <116176439+Ashwath7349@users.noreply.github.com> Date: Tue, 25 Apr 2023 09:23:31 +0530 Subject: [PATCH] Add files via upload --- Dockerfile | 20 ++++++-------------- connect.py | 25 +++++++++++++++++++++++++ docker-compose.yml | 9 +++++++++ output2.csv | 10 ++++++++++ web_scrapping_sample.py | 12 ++++++++++++ 5 files changed, 62 insertions(+), 14 deletions(-) create mode 100644 connect.py create mode 100644 docker-compose.yml create mode 100644 output2.csv create mode 100644 web_scrapping_sample.py diff --git a/Dockerfile b/Dockerfile index f07ae2b..ff4dc78 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,9 @@ FROM python:3.10.2-alpine3.15 -COPY . . -# Install Postgres -RUN apk update -RUN apk add postgresql -RUN chown postgres:postgres /run/postgresql/ -# Install requirements -COPY ./requirements.txt /tmp -RUN pip install -r /tmp/requirements.txt -# For psycopg2 -RUN apk add --virtual postgresql-deps libpq-dev -# Create directories +# Create directories RUN mkdir -p /root/workspace/src -# Mount your local file -COPY ./web_scraping_sample.py /root/workspace/src +COPY ./web_scrapping_sample.py /root/workspace/src # Switch to project directory -WORKDIR /root/workspace/src \ No newline at end of file +WORKDIR /root/workspace/src +# Install required packages +RUN pip install --upgrade pip +RUN pip install requests bs4 html5lib diff --git a/connect.py b/connect.py new file mode 100644 index 0000000..9437345 --- /dev/null +++ b/connect.py @@ -0,0 +1,25 @@ +import psycopg2 +import csv +conn = psycopg2.connect( + host="172.17.0.2", + port="5432", + dbname="demodb", + user="postgres", + password="123456" +) + +cur = conn.cursor() + +with open('output2.csv','r') as f: + csv_reader = csv.reader(f) + next(csv_reader) + for row in csv_reader: + if row[1] =="": + row[1] = "N/A" + + cur.execute("INSERT INTO demotable (column1, column2, column3) VALUES (%s, %s, %s)",(row[0], row[1],row[2])) + +conn.commit() + +cur.close() +conn.close() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..15f0a8d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +psql-db: + image: 'postgres:14' + container_name: psql-db1 + environment: + - PGPASSWORD=123456 + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=123456 + ports: + - '5434:5432' diff --git a/output2.csv b/output2.csv new file mode 100644 index 0000000..86f7d24 --- /dev/null +++ b/output2.csv @@ -0,0 +1,10 @@ +column 1,column 2,column 3 +"Major new features of the 3.12 series, compared to 3.11",And now for something completely different,Enjoy the new releases +More resources,Enjoy the new releases,"Major new features of the 3.12 series, compared to 3.11" +
,"Major new features of the 3.12 series, compared to 3.11",More resources +,More resources,
+,
,And now for something completely different +,And now for something completely different,
+,
,Enjoy the new releases +,,"Major new features of the 3.12 series, compared to 3.11" +,,"More resources" diff --git a/web_scrapping_sample.py b/web_scrapping_sample.py new file mode 100644 index 0000000..06b1147 --- /dev/null +++ b/web_scrapping_sample.py @@ -0,0 +1,12 @@ +import requests +import csv +from bs4 import BeautifulSoup +res=requests.get("https://blog.python.org/") +soup = BeautifulSoup(res.content, "html.parser") +titles = soup.find_all("h1") +with open('output2.csv','w',newline='') as f: + writer = csv.writer(f) + writer.writerow(['column 1','column 2','column 3']) + for row in titles: + writer.writerow(row) +f.close()