1. python讀取用戶名密碼
user = ['xrz','wyh']
password = ['123456','654321']
size = len(user)
n = 0
while True :
setting = input("請問您需要登陸,注冊,退出?1.登陸,2.注冊,3.退出\n")
if setting == '1':
User = input("請輸入您的用戶名:\n")
while n <= size:
if User == user[n]:
Password = input("請輸入您的用戶密碼:\n")
if Password == password[n]:
print("歡迎",user[n])
break
else:
print("密碼錯誤")
break
n = n + 1
if n == size:
print("用戶名不存在!")
break
elif setting == '2':
user.append(input("請輸入你要注冊的用戶名:\n"))
password.append(input("請輸入你的密碼:\n"))
print("注冊成功!")
elif setting == '3':
break
else:
print("請輸入正確的需求!")
2. python爬蟲項目實戰:爬取用戶的所有信息,如性別、年齡等
python爬蟲項目實戰:
爬取糗事網路用戶的所有信息,包括用戶名、性別、年齡、內容等等。
10個步驟實現項目功能,下面開始實例講解:
1.導入模塊
import re
import urllib.request
from bs4 import BeautifulSoup
2.添加頭文件,防止爬取過程被拒絕鏈接
def qiuShi(url,page):
################### 模擬成高仿度瀏覽器的行為 ##############
heads ={
'Connection':'keep-alive',
'Accept-Language':'zh-CN,zh;q=0.9',
'Accept':'text/html,application/xhtml+xml,application/xml;
q=0.9,image/webp,image/apng, / ;q=0.8',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
}
headall = []
for key,value in heads.items():
items = (key,value)
headall.append(items)
opener = urllib.request.build_opener()
opener.addheaders = headall
urllib.request.install_opener(opener)
data = opener.open(url).read().decode()
################## end ########################################
3.創建soup解析器對象
soup = BeautifulSoup(data,'lxml')
x = 0
4.開始使用BeautifulSoup4解析器提取用戶名信息
############### 獲取用戶名 ########################
name = []
unames = soup.find_all('h2')
for uname in unames:
name.append(uname.get_text())
#################end#############################
5.提取發表的內容信息
############## 發表的內容 #########################
cont = []
data4 = soup.find_all('div',class_='content')
data4 = str(data4)
soup3 = BeautifulSoup(data4,'lxml')
contents = soup3.find_all('span')
for content in contents:
cont.append(content.get_text())
##############end####################################
6.提取搞笑指數
#################搞笑指數##########################
happy = []
data2 = soup.find_all('span',class_="stats-vote")
data2 = str(data2) # 將列表轉換成字元串形式才可以使用
soup1 = BeautifulSoup(data2,'lxml')
happynumbers = soup1.find_all('i',class_="number")
for happynumber in happynumbers:
happy.append(happynumber.get_text())
##################end#############################
7.提取評論數
############## 評論數 ############################
comm = []
data3 = soup.find_all('a',class_='qiushi_comments')
data3 = str(data3)
soup2 = BeautifulSoup(data3,'lxml')
comments = soup2.find_all('i',class_="number")
for comment in comments:
comm.append(comment.get_text())
############end#####################################
8.使用正則表達式提取性別和年齡
######## 獲取性別和年齡 ##########################
pattern1 = '<div class="articleGender (w ?)Icon">(d ?)</div>'
sexages = re.compile(pattern1).findall(data)
9.設置用戶所有信息輸出的格局設置
################## 批量輸出用戶的所以個人信息 #################
print()
for sexage in sexages:
sa = sexage
print(' ' 17, '= = 第', page, '頁-第', str(x+1) + '個用戶 = = ',' ' 17)
print('【用戶名】:',name[x],end='')
print('【性別】:',sa[0],' 【年齡】:',sa[1])
print('【內容】:',cont[x])
print('【搞笑指數】:',happy[x],' 【評論數】:',comm[x])
print(' ' 25,' 三八分割線 ',' ' 25)
x += 1
###################end##########################
10.設置循環遍歷爬取13頁的用戶信息
for i in range(1,14):
url = ' https://www.qiushike.com/8hr/page/'+str(i)+'/'
qiuShi(url,i)
運行結果,部分截圖:
3. python怎麼爬取簡書用戶名
初步的思路
今天在用Scrapy寫代碼的時候,對網頁的結構也有了大致的分析,再加上之前羅羅攀的思路,初步我是通過專題入口
熱門專題
image.png
image.png
專題管理員 (一般粉絲、文章、字數、收獲喜歡、這幾項數據都非常漂亮)
image.png
image.png
以上紅框里的數據項就是我需要爬取的欄位
但是以上的思路存在一點的問題:
存在一些簡書用戶並不是一些熱門專題的管理員,但是其人氣粉絲量也很高,這個思路可能無法將這些用戶爬取下來
進階的思路
熱門專題
專題關注的人
專題關注的人的動態
推薦作者 粉絲信息
image.png
image.png
image.png
優點:
數據大而全,基本包含了99%的用戶(個人猜測,不嚴謹)
缺點:
因為許多用戶不止關注一個專題,而且其中包含了大量的新注冊用戶(數據很多為空),並且也有大量重復數據需要去重
代碼部分:
jianshu.py 還在調試階段,待更新...
# -*- coding: utf-8 -*-
import sys
import json
import requests
import scrapy
import re
from lxml import etree
from scrapy.http import Request
reload(sys)
sys.path.append('..')
sys.setdefaultencoding('utf-8')
class jianshu(scrapy.Spider):
name = 'jianshu'
# topic_category = ['city']
topic_category = ['recommend', 'hot', 'city']
base_url = 'lections?page=%s&order_by=%s'
cookies={
'UM_distinctid': '15b89d53a930-02ab95f11ccae2-51462d15-1aeaa0-15b89d53a9489b',
'CNZZDATA1258679142': '1544557204-1492664886-%7C1493280769',
'_session_id': '%3D%3D--',
'remember_user_token':'Q3LjYwODEwNzgiXQ%3D%3D--',
'_ga': 'GA1.2.2016948485.1492666105',
'_gid': 'GA1.2.382495.1494550475',
'Hm_lpvt_': '1494550475',
'Hm_lvt_': '1494213432,1494213612,1494321303,1494387194'
}
headers = {
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept - Language': 'zh - CN, zh;q = 0.8',
'Connection': 'close',
'Cookie': 'UM_distinctid=15b89d53a930-02ab95f11ccae2-51462d15-1aeaa0-15b89d53a9489b; CNZZDATA1258679142=1544557204-1492664886-%7C1493280769; remember_user_token=Q3LjYwODEwNzgiXQ%3D%3D--; _ga=GA1.2.2016948485.1492666105; _gid=GA1.2.824702661.1494486429; _gat=1; Hm_lvt_=1494213432,1494213612,1494321303,1494387194; Hm_lpvt_=1494486429; _session_id=%3D%3D--',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36',
'Host':'www.jianshu.com',
"X-Requested-With": 'XMLHttpRequest'
}
def get_total_page(self):
#獲取專題總頁數 包含3個字典的列表 [{"hot": xx}, {"recommend":xx}, {"city": xx}]
total_page_list = []
for order in self.topic_category:
order = order.decode('utf-8')
total_page = 100
dict = {}
for page in range(1, total_page):
url = self.base_url % (page, order)
html = requests.get(url, headers=self.headers).content
selector = etree.HTML(html)
#print html
try:
elements = selector.xpath('//*[@id="list-container"]/div[1]/div/h4/a/text()')[0]
if elements is not Exception:
continue
except Exception :
dict['total_page'] = page - 1
dict['category'] = order
break
total_page_list.append(dict)
return total_page_list
def get_topic_info(self):
#獲取專題信息
topic_info_list = []
total_page_list = self.get_total_page()
base_url = self.base_url
for dict in total_page_list:
category = dict['category']
total_page = int(dict['total_page'])
for page in range(1, total_page + 1):
url = base_url % (page, category)
html = requests.get(url, headers=self.headers,cookies=self.cookies).content
selector = etree.HTML(html)
topic_href = selector.xpath('//*[@id="list-container"]')[0]
for href in topic_href:
dict = {}
topic_name = href.xpath('./div/h4/a/text()')[0]
topic_url = "www.jianshu.com" + href.xpath('./div/h4/a/@href')[0]
topic_img_url = href.xpath('./div/a/img/@src')[0]
img_num = topic_img_url.split("/")[5]
dict['topic_name'] = topic_name
dict['topic_url'] = topic_url
#
dict['img_num'] = img_num
topic_info_list.append(dict)
return topic_info_list
def get_topic_admin_info(self):
#獲取管理員信息
topic_admin_info_list = []
topic_info_list = self.get_topic_info()
for d in topic_info_list:
img_num = str(d['img_num'])
base_url = "s/editors_and_subscribers" % img_num
base_url_response = requests.get(base_url, headers=self.headers, cookies=self.cookies)
json_data_base = json.loads(base_url_response.text.decode('utf-8'))
editors_total_pages = json_data_base['editors_total_pages']
for page in range(1, int(editors_total_pages) + 1):
if page == 1:
editors = json_data_base['editors']
for editor in editors:
dict = {}
dict['nickname'] = editor['nickname']
dict['slug'] = editor['slug']
topic_admin_info_list.append(dict)
else:
try:
url = "}/editors?page={}".format(img_num, page)
response = requests.get(url,headers=self.headers,cookies=self.cookies)
json_data = json.loads(response.text.decode('utf-8'))
editors = json_data['editors']
for editor in editors:
dict = {}
dict['nickname'] = editor['nickname']
dict['slug'] = editor['slug']
topic_admin_info_list.append(dict)
except Exception:
pass
return topic_admin_info_list
def get_followers_following_list(self):
# 獲取管理員粉絲列表
followers_list = []
topic_admin_list = self.get_topic_admin_info()
followers_base_url = "s/%s/followers"
for dict in topic_admin_list:
url = followers_base_url % dict['slug']
headers = self.headers
headers['Referer'] = url
headers['DNT'] = '1'
response = requests.get(url, headers=headers, cookies=self.cookies).content
total_followers = re.fi
4. Python設計一個用戶名和密碼
name=input()
if name=='這個地方是你判斷用的用戶名':
縮進pass=input()
縮進if pass=='這里是你判斷的密碼':
縮進縮進print('用戶名密碼都對')
縮進else:
縮進縮進print('密碼不對')
else:
縮進print('用戶名不對')
5. python 獲取登錄名getpass.getuser 怎麼處理root
getpass 模塊提供了平台無關的在命令行下輸入密碼的方法. getpass(prompt) 會顯示提示字元串, 關閉鍵盤的屏幕反饋, 然後讀取密碼. 如果提示參數省略, 那麼它將列印出 "Password:". getuser() 獲得當前用戶名, 如果可能的話.
6. python作業:1.設計賬號登錄程序,要求如下:(1)動態獲取用戶名和密碼,格式為"用戶名:密碼"
這個需求沒說清楚用什麼平台,比如是桌面版,還是web版。對所用技術或者庫有沒有什麼限制。