diff --git a/Dockerfile b/Dockerfile index f07ae2b..ff4dc78 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,9 @@ FROM python:3.10.2-alpine3.15 -COPY . . -# Install Postgres -RUN apk update -RUN apk add postgresql -RUN chown postgres:postgres /run/postgresql/ -# Install requirements -COPY ./requirements.txt /tmp -RUN pip install -r /tmp/requirements.txt -# For psycopg2 -RUN apk add --virtual postgresql-deps libpq-dev -# Create directories +# Create directories RUN mkdir -p /root/workspace/src -# Mount your local file -COPY ./web_scraping_sample.py /root/workspace/src +COPY ./web_scrapping_sample.py /root/workspace/src # Switch to project directory -WORKDIR /root/workspace/src \ No newline at end of file +WORKDIR /root/workspace/src +# Install required packages +RUN pip install --upgrade pip +RUN pip install requests bs4 html5lib diff --git a/connect.py b/connect.py new file mode 100644 index 0000000..9437345 --- /dev/null +++ b/connect.py @@ -0,0 +1,25 @@ +import psycopg2 +import csv +conn = psycopg2.connect( + host="172.17.0.2", + port="5432", + dbname="demodb", + user="postgres", + password="123456" +) + +cur = conn.cursor() + +with open('output2.csv','r') as f: + csv_reader = csv.reader(f) + next(csv_reader) + for row in csv_reader: + if row[1] =="": + row[1] = "N/A" + + cur.execute("INSERT INTO demotable (column1, column2, column3) VALUES (%s, %s, %s)",(row[0], row[1],row[2])) + +conn.commit() + +cur.close() +conn.close() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..15f0a8d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +psql-db: + image: 'postgres:14' + container_name: psql-db1 + environment: + - PGPASSWORD=123456 + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=123456 + ports: + - '5434:5432' diff --git a/output2.csv b/output2.csv new file mode 100644 index 0000000..86f7d24 --- /dev/null +++ b/output2.csv @@ -0,0 +1,10 @@ +column 1,column 2,column 3 +"Major new features of the 3.12 series, compared to 3.11",And now for something completely different,Enjoy the new releases +More resources,Enjoy the new releases,"Major new features of the 3.12 series, compared to 3.11" +
,"Major new features of the 3.12 series, compared to 3.11",More resources +,More resources,
+,
,And now for something completely different +,And now for something completely different,
+,
,Enjoy the new releases +,,"Major new features of the 3.12 series, compared to 3.11" +,,"More resources" diff --git a/web_scrapping_sample.py b/web_scrapping_sample.py new file mode 100644 index 0000000..06b1147 --- /dev/null +++ b/web_scrapping_sample.py @@ -0,0 +1,12 @@ +import requests +import csv +from bs4 import BeautifulSoup +res=requests.get("https://blog.python.org/") +soup = BeautifulSoup(res.content, "html.parser") +titles = soup.find_all("h1") +with open('output2.csv','w',newline='') as f: + writer = csv.writer(f) + writer.writerow(['column 1','column 2','column 3']) + for row in titles: + writer.writerow(row) +f.close()