soarli

简易Python爬虫实时监视校园网当月流量信息
1.0版本效果代码import requests import os import re import time ...
扫描右侧二维码阅读全文
26
2021/05

简易Python爬虫实时监视校园网当月流量信息

1.0版本

效果

代码

import requests
import os
import re
import time

while 1:
    html = open('get.html','w+')
    r = requests.get(url='http://10.0.108.3')
    print(r.text,file=html)

    html.close()

    html = open('get.html','r')

    for i in range(13):
        line = html.readline()
        #print(line)

    zz = re.findall(r"flow=\'(.*)\';f", line)

    ti_qu_de_jie_guo = zz[0]
    jieguo = int(ti_qu_de_jie_guo)/1024/1024


    print("当前网络环境本月用了:"+ str(round(jieguo, 2)) + "GB流量!", end="\r")
    time.sleep(0.3)

1.5版本

5.29更新

27日晚上突发奇想,尝试将结果输出到网页上以实现校园网范围内都可以看到实时宿舍上网流量信息(貌似没太大用),但是这种前后端分离的思想倒是得到了一次充分的实践。

import requests
import os
import re
import time

def sc_html():
    print('<!DOCTYPE html>',file=index)
    print('<html>',file=index)
    print('<head>',file=index)
    print('    <title>网络流量信息查看 - Powered by soarli</title>',file=index)
    print('    <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=0">',file=index)
    print('    <link rel="icon" href="soarli.ico">',file=index)
    print('    <style type="text/css">',file=index)
    print('        body{',file=index)
    print('            background-image: url("bj.jpg");',file=index)
    print('            background-repeat: repeat;',file=index)
    print("            cursor:url('https://blog.soarli.top/usr/plugins/HoerMouse/static/image/dew/normal.cur'), auto;",file=index)
    print('        }',file=index)
    print('        #info{',file=index)
    print('            text-align: center;',file=index)
    print('            font-size: 24px;',file=index)
    print('        }',file=index)
    print('    </style>',file=index)
    print('    <script language="JavaScript"> ',file=index)
    print('    function myrefresh() ',file=index)
    print('    { ',file=index)
    print('    window.location.reload(); ',file=index)
    print('    } ',file=index)
    print("    setTimeout('myrefresh()',5000); ",file=index)
    print('    </script> ',file=index)
    print('</head>',file=index)
    print('<body>',file=index)
    print('<br><br><br><br>',file=index)
    print('<div id="info">',file=index)
    print('<img src="soarli.ico"><br><br>',file=index)
    print("我的网络环境本月用了:"+ str(round(jieguo, 2)) + "GB流量!",file=index)
    print('<br><br>本页面每5秒钟自动刷新一次...',file=index)
    print('</div>',file=index)
    print('</body>',file=index)
    print('</html>',file=index)
    
while 1:
    html = open('get.html','w+')
    index = open('index.html','w')
    r = requests.get(url='http://10.0.108.3')
    print(r.text,file=html)

    html.close()

    html = open('get.html','r')

    for i in range(13):
        line = html.readline()
        #print(line)

    zz = re.findall(r"flow=\'(.*)\';f", line)

    ti_qu_de_jie_guo = zz[0]
    jieguo = int(ti_qu_de_jie_guo)/1024/1024


    sc_html()
    #index.close()
    time.sleep(0.3)

2.0版本

7.1更新:支持定时自动查询

每天早上6点开始,每隔6小时执行一次:
get_xyw_liuliang.xml

<?xml version="1.0" encoding="UTF-16"?>
<Task version="1.2" xmlns="http://schemas.microsoft.com/windows/2004/02/mit/task">
  <RegistrationInfo>
    <Date>2021-07-02T00:36:45.5624986</Date>
    <Author>xxxxx</Author>
    <URI>\get_xyw_liuliang</URI>
  </RegistrationInfo>
  <Triggers>
    <CalendarTrigger>
      <Repetition>
        <Interval>PT6H</Interval>
        <StopAtDurationEnd>false</StopAtDurationEnd>
      </Repetition>
      <StartBoundary>2021-07-02T06:00:00</StartBoundary>
      <Enabled>true</Enabled>
      <ScheduleByDay>
        <DaysInterval>1</DaysInterval>
      </ScheduleByDay>
    </CalendarTrigger>
  </Triggers>
  <Principals>
    <Principal id="Author">
      <UserId>S-1-5-21-1310891779-4248587301-955746212-1001</UserId>
      <LogonType>InteractiveToken</LogonType>
      <RunLevel>LeastPrivilege</RunLevel>
    </Principal>
  </Principals>
  <Settings>
    <MultipleInstancesPolicy>IgnoreNew</MultipleInstancesPolicy>
    <DisallowStartIfOnBatteries>false</DisallowStartIfOnBatteries>
    <StopIfGoingOnBatteries>true</StopIfGoingOnBatteries>
    <AllowHardTerminate>false</AllowHardTerminate>
    <StartWhenAvailable>false</StartWhenAvailable>
    <RunOnlyIfNetworkAvailable>false</RunOnlyIfNetworkAvailable>
    <IdleSettings>
      <StopOnIdleEnd>true</StopOnIdleEnd>
      <RestartOnIdle>false</RestartOnIdle>
    </IdleSettings>
    <AllowStartOnDemand>true</AllowStartOnDemand>
    <Enabled>true</Enabled>
    <Hidden>false</Hidden>
    <RunOnlyIfIdle>false</RunOnlyIfIdle>
    <WakeToRun>false</WakeToRun>
    <ExecutionTimeLimit>PT0S</ExecutionTimeLimit>
    <Priority>7</Priority>
  </Settings>
  <Actions Context="Author">
    <Exec>
      <Command>D:\xyw2.0\start.vbs</Command>
    </Exec>
  </Actions>
</Task>

start.vbs

set shell=wscript.createObject("wscript.shell")  
run=shell.Run("D:\xyw2.0\start.bat", 0)

start.bat

d:
cd xyw2.0
set http_proxy=
start /b cmd /k  "python p_mail.py"

p_mail.py

import requests
import os
import re
import time
import email.mime.multipart
import email.mime.text
import smtplib


html = open('get.html','w+')
r = requests.get(url='http://10.0.108.3')
print(r.text,file=html)
html.close()
html = open('get.html','r')
for i in range(25):
    line = html.readline()
    # print(line)
zz = re.findall(r"flow=\'(.*)\';f", line)
ti_qu_de_jie_guo = zz[0]
jieguo = int(ti_qu_de_jie_guo)/1024/1024

# 发送邮件
title = '宿舍本月已用流量:' + str(round(jieguo, 2)) + 'GB'
content = '截至' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ',宿舍已用流量' + str(round(jieguo, 2)) + 'GB'
ret = True
FROM_MAIL = "发件邮箱"
TO_MAIL = "收件邮箱"
SMTP_SERVER = 'smtp.163.com'
SSL_PORT = '465'
USER_NAME = FROM_MAIL 
USER_PWD = "发件邮箱的key值"
msg = email.mime.multipart.MIMEMultipart()
msg['from'] = FROM_MAIL
# msg['to'] = ';'.join(TO_MAIL) 
msg['to'] = TO_MAIL
msg['subject'] = title
txt = email.mime.text.MIMEText(content)
msg.attach(txt)
try:
    # 纯粹的ssl加密方式
    smtp = smtplib.SMTP_SSL(SMTP_SERVER, SSL_PORT)   #邮件服务器地址和端口
    smtp.ehlo()  # 用户认证
    smtp.login(USER_NAME, USER_PWD)  # 括号中对应的是发件人邮箱账号、邮箱密码
    smtp.sendmail(FROM_MAIL, TO_MAIL, str(msg))  # 收件人邮箱账号、发送邮件
    smtp.quit()  # 等同 smtp.close()  ,关闭连接
except Exception as e:
    ret = False
    print(">>>>>>>:" + e)

7.2更新:支持通过Siri查询

xywmail.php

<?php
echo '<title>success</title>';
exec('c:\WINDOWS\system32\cmd.exe /c START /b D:\xyw2.0\startbyphp.bat');
//exit('success');
?>

startbyphp.bat

@echo off
cd /
cd xyw2.0
set http_proxy=
start /b cmd /k  "python p_mail.py"
exit

快捷指令:

机房调试(python环境变量异常bat无法直接调用python)

xywmail.php

<?php
echo '<title>success</title>';
exec('c:\WINDOWS\system32\cmd.exe /c START /b C:\startup\xyw2.0\startbyphp.bat');
//exit('success');
?>

startbyphp.bat

@echo off
cd \Users\Administrator\AppData\Local\Programs\Python\Python37
set http_proxy=
start /b cmd /k "python \startup\xyw2.0\p_mail.py"
exit

start.vbs

set shell=wscript.createObject("wscript.shell")  
run=shell.Run("C:\startup\xyw2.0\start.bat", 0)

start.bat

cd /startup/xyw2.0
set http_proxy=
start /b cmd /k  "python \startup\xyw2.0\p_mail.py"

get_xyw_liuliang.xml

<?xml version="1.0" encoding="UTF-16"?>
<Task version="1.2" xmlns="http://schemas.microsoft.com/windows/2004/02/mit/task">
  <RegistrationInfo>
    <Date>2021-07-02T00:36:45.5624986</Date>
    <Author>xxxxx</Author>
    <URI>\get_xyw_liuliang</URI>
  </RegistrationInfo>
  <Triggers>
    <CalendarTrigger>
      <Repetition>
        <Interval>PT6H</Interval>
        <StopAtDurationEnd>false</StopAtDurationEnd>
      </Repetition>
      <StartBoundary>2021-07-02T06:00:00</StartBoundary>
      <Enabled>true</Enabled>
      <ScheduleByDay>
        <DaysInterval>1</DaysInterval>
      </ScheduleByDay>
    </CalendarTrigger>
  </Triggers>
  <Principals>
    <Principal id="Author">
      <UserId>WIN-SE858M9ON5Q\Administrator</UserId>
      <LogonType>InteractiveToken</LogonType>
      <RunLevel>LeastPrivilege</RunLevel>
    </Principal>
  </Principals>
  <Settings>
    <MultipleInstancesPolicy>IgnoreNew</MultipleInstancesPolicy>
    <DisallowStartIfOnBatteries>false</DisallowStartIfOnBatteries>
    <StopIfGoingOnBatteries>true</StopIfGoingOnBatteries>
    <AllowHardTerminate>false</AllowHardTerminate>
    <StartWhenAvailable>false</StartWhenAvailable>
    <RunOnlyIfNetworkAvailable>false</RunOnlyIfNetworkAvailable>
    <IdleSettings>
      <StopOnIdleEnd>true</StopOnIdleEnd>
      <RestartOnIdle>false</RestartOnIdle>
    </IdleSettings>
    <AllowStartOnDemand>true</AllowStartOnDemand>
    <Enabled>true</Enabled>
    <Hidden>false</Hidden>
    <RunOnlyIfIdle>false</RunOnlyIfIdle>
    <WakeToRun>false</WakeToRun>
    <ExecutionTimeLimit>PT0S</ExecutionTimeLimit>
    <Priority>7</Priority>
  </Settings>
  <Actions Context="Author">
    <Exec>
      <Command>C:\startup\xyw2.0\start.vbs</Command>
    </Exec>
  </Actions>
</Task>

搞定:

正则表达式在线测试

https://tool.oschina.net/regex/

参考资料:

https://blog.csdn.net/ztf312/article/details/47259805

https://www.runoob.com/regexp/regexp-syntax.html

https://blog.csdn.net/lcczzu/article/details/46879683

https://blog.csdn.net/Jerry_1126/article/details/85009810

https://zhuanlan.zhihu.com/p/33288426

https://www.jianshu.com/p/ae0078ea55d8

https://www.codenong.com/835941/

https://blog.csdn.net/qq_21808961/article/details/81148913

最后修改:2021 年 07 月 02 日 03 : 14 PM

2 条评论

  1. HMP

    天天追更老哥博客

    1. soarli

发表评论 取消回复