创建一个工具,从IT类网站收集包含特定关键词的文章,并发送到Teams
为了什么
如果是工程师的话,就想要增加知识的获取途径。这次我指定了IT类网站的关键词来收集文章,并发送到Teams。下次希望能将文章内容进行摘要并发送,达到进一步的工具化。
系统配置
对目标及其查询进行分析。
這次的測試目標僅限於Qiita。
-
- qiita
https://qiita.com/search?q=___KEYWORD____
代码主体
import re, openai, json, os, urllib.request, json, requests, random, time, pymsteams, time
from bs4 import BeautifulSoup
import pandas as pd
from urllib import request
teamsapi = os.environ["TEAMS_WEB_HOOK_URL"]
def qiita(keyword):
url="https://qiita.com/search?q="
pagerand = random.randint(1,50)
qurllist=[]
for page in range(10):
page = page + pagerand
mkurl=str(url) + str(keyword) + "&sort=rel&stocked=&page=" + str(page)
response = request.urlopen(mkurl)
soup = BeautifulSoup(response, 'html.parser')
response.close()
tag_list = soup.select('a[href].style-1lvpob1')
for tag in tag_list:
tagurl = tag.get('href')
qurl="https://qiita.com/" + str(tagurl)
qurllist.append(qurl)
for url in qurllist:
response = request.urlopen(url)
soup = BeautifulSoup(response, 'html.parser')
response.close()
title=soup.find("title")
print(title.text)
teamssend(title.text, url)
time.sleep(20)
def zenn(keyword):
url="https://zenn.dev/search"
pagerand = random.randint(1,5)
qurllist=[]
tag_list=[]
for page in range(10):
page = page + pagerand
mkurl=str(url) + "?q=" + str(keyword) + "&page=" + str(page)
print(mkurl)
headers = { "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0" }
request = urllib.request.Request(url=mkurl, headers=headers)
response = urllib.request.urlopen(request)
soup = BeautifulSoup(response, 'html.parser')
response.close()
print(soup)
#for element in soup.find_all('a'):
# eurl = element.get("href")
# eurl = "https://zenn.dev" + str(eurl)
# qurllist.append(eurl)
# print(eurl)
#print(qurllist)
#for tag in tag_list:
# tagurl = tag.get('href')
# print(tagurl)
# qurl="https://zenn.dev" + str(tagurl)
# print(qurl)
# qurllist.append(qurl)
for url in qurllist:
response = request.urlopen(url)
soup = BeautifulSoup(response, 'html.parser')
response.close()
title=soup.find("title")
print(title.text)
teamssend(title.text, url)
time.sleep(20)
def teamssend(title, text):
myTeamsMessage = pymsteams.connectorcard(teamsapi)
myTeamsMessage.title(title)
myTeamsMessage.text(text)
myTeamsMessage.send()
def insert():
keywordlist = ["kubevirt", "localstack", "awscli", "prometheus", "iac", "terraform", "openstack", "kubernetes", "docker", "aws", "cloud", "container","ubuntu", "packet", "network", "docker-compose", "packer", "SSO", "S3", "monitoring","vpn", "git", "github"]
for keyword in random.sample(keywordlist, len(keywordlist)):
print(keyword)
qiita(keyword)
# zenn(keyword)
insert()
创建服务
[Unit]
Description=
Documentation=
[Service]
Type=simple
User=root
Group=root
TimeoutStartSec=0
Restart=on-failure
RestartSec=30s
#ExecStartPre=
ExecStart=/home/shoma/ainews/ainews.sh
SyslogIdentifier=Diskutilization
#ExecStop=
[Install]
编写Shell脚本。
#!/bin/bash
export TEAMS_WEB_HOOK_URL="https://smetrocit.webhook.office.com/webhookb2/f4891809-6acc-4059-9ad2-e039bf192240@a2c3e6fc-a959-4d2f-b374-4593a068ff9c/IncomingWebhook/32554e25629e47c09456be77fd4e4b4c/40a95c6c-b84c-44be-b5af-dc08583d9f71"
while true
do
python3 /home/shoma/ainews/news.py
sleep 21600
done
说明
根据查询分析的结果生成URL。
url="https://qiita.com/search?q="
pagerand = random.randint(1,50)
qurllist=[]
for page in range(10):
page = page + pagerand
mkurl=str(url) + str(keyword) + "&sort=rel&stocked=&page=" + str(page)
获取网站的HTML
response = request.urlopen(mkurl)
soup = BeautifulSoup(response, 'html.parser')
response.close()
提取具有class为style-1lvpob1且具有href属性的a标签元素。
由于提取结果将在location中使用,故需加上域名。
tag_list = soup.select('a[href].style-1lvpob1')
for tag in tag_list:
tagurl = tag.get('href')
qurl="https://qiita.com/" + str(tagurl)
qurllist.append(qurl)
获取提取的URL的HTML
for url in qurllist:
response = request.urlopen(url)
soup = BeautifulSoup(response, 'html.parser')
response.close()
获取文章标题
title=soup.find("title")
向Teams发送给发送函数
teamssend(title.text, url)
将函数发送到团队
def teamssend(title, text):
myTeamsMessage = pymsteams.connectorcard(teamsapi)
myTeamsMessage.title(title)
myTeamsMessage.text(text)
myTeamsMessage.send()
指定关键字
keywordlist = ["kubevirt", "localstack", "awscli", "prometheus", "iac", "terraform", "openstack", "kubernetes", "docker", "aws", "cloud", "container","ubuntu", "packet", "network", "docker-compose", "packer", "SSO", "S3", "monitoring","vpn", "git", "github"]
依次将关键词发送给函数。
for keyword in random.sample(keywordlist, len(keywordlist)):
print(keyword)
qiita(keyword)