Python crawler (xpath), get the current price and market value of a stock

Home Page > Game > Content 2021-07-04

Topic:Analyze a stock example paper

For example, we need to implement a Baidu stock to query the current price and market value of the stock

We query the current price and market value of this stock "600754"

The implementation steps are as follows:

1 、Import the request library, and also need to import the lxml library (used later)

import re

#

Import the re library

from lxml

import etree

#

Import the lxml library (without this library, pip install lxml install)

2 , Construct the request data, Baidu search initiates the request data more

cookies =

{

'

BIDUPSID

':

'

90EF3BD78F53BC8C96DF84CD3854CA2D

'

,

'

PSTM

':

'

1578233930

'

,

'

BD _ UPN

':

'

12314753

'

,

'

BAIDUID

':

'

885754C8E6BD7B1A771802631815CC6D:FG=1

'

,

'

BDORZ

':

'

B490B5EBF6F3CD402E515D22BCDA1598

'

,

'

BDUSS

':

'

mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpcWtabHRlSVFBACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2

>':

'

7 _ 0 _ 5 _ 3 _ 11 _ 3 _ 0 _ 0 _ 4 _ 2 _ 1 _ 0 _ 73199 _ 0 _ 169 _ 0 _ 1580456363 _ 0 _ 1580456194%7C9%23622712 _ 32 _ 1580376248%7C6

'

,

'

cflag

':

'

13%3A3

'

,

'

BD _ HOME

':

'

1

'

,

'

BDRCVFR[ feWj1Vr5u3D ]

':

'

I67x6TjHwwYf0

'

,

'

delPer

':

'

0

'

,

'

BD _ CK _ SAM

':

'

1

'

,

'

PSINO

':

'

3

'

,

'

H _ PS _ PSSID

':

'

1438 _ 21104 _ 26350

'

,

'

H _ PS _ 645EC

':

'

29b8ZVy4WP7OUTz6%2FjeON9Iex%2FPZmThFknleY0LwzNQZ8j8 span>

'

,

'

BDSVRTM

':

'

121

'

,

'

WWW _ ST

':

'

1580466352318

'

,}headers =

{

'

is _ xhr

':

'

1

'

,

'

Accept-Encoding

':

'

gzip, deflate, br

'

,

'

Accept-Language

':

'

zh-CN,zh;q=0.9

'

,

'

User-Agent

':

'

Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36

'

,

'

is _ pbs

':

'

600754

'

,

'

Accept

':

'

*/*

'

,

'

Referer

':

'

%2FjeON9Iex%2FPZmThFknleY0LwzNQZ8j8&rsv _ pq=b379448d00013935

'

,

'

X-Re-With

':

'

XMLHttpRequest

'

,

'

Connection

' :

'

keep-alive

'

,

'

is _ referer

':

'

'

,}params =

(((

'

ie

', [

'

utf-8

',

'

utf-8

'

]), (

'

newi

',

'

1

'

), (

'

mod

',

'

1

'

), (

'

isbd

',

'

1

'

), (

'

isid

',

'

b379448d00013935

'

), (

'

wd

',

'

600754

'

), (

'

rsv _ spt

',

'

1

'

) , (

'

rsv _ iqid

',

'

0xa5a17c8700013159

'

), (

'

issp

',

'

1

'

), (

'

f

',

'

8

'

), (

'

rsv _ bp

',

'

1

'

), (

'

rsv _ idx

',

'

2

'

), (

'

rqlang

',

'

cn

'

) , (

'

tn

',

'

baiduhome _ pg

'

), (

'

rsv _ enter

',

'

0

'

), (

'

rsv _ dl

' ,

'

tb

'

), (

'

oq

',

'

600754

'

), (

'

rsv _ t

',

'

29b8ZVy4WP7OUTz6/jeON9Iex/PZmThFknleY0LwzNQZ8j8

'

), (

'

rsv _ pq

',

'

b379448d00013935

'

), (

'

bs

',

'

600754

'

), (

'

rsv _ sid

',

'

1438 _ 21104 _ 26350

'

), (

'

_ ss

',

'

1

'

), (

'

clist

',

''

), (

'

hsug

',

''

), (

'

f4s

',

'

1

'

), (

'

csor

',

'

6

'

), (

'

_ cr1

',

'

29647

'

),)

3. Initiate a request, and the result is converted to text (after analyzing the returned data, you need to read the text content)

response = re.get(

'

', headers=headers, params=params, cookies=cookies).text

4. Analysis results (Xpath crawler used to analyze data) specific usage blog There are other articles explaining:

html =

etree.HTML(response)a = html.xpath(

'

//span[ @class = "op -stockdynamic-moretab-cur-num c-gap-right-small" ]/text()

'

)

print(

'

Current price:

',a[ 0 ])

#

Current priceb = html.xpath(

'

//ul[ @class = "op-stockdynamic-moretab-info" ]/li[ 8 ]/span[ 2 ]/text()

'

)

print(

'

Current market value:

',b[ 0 ])

#

Current market value

For quick access to request header information, please refer to this article:

All code

#

This script Realize, specify the stock code Baidu to query the market value and current stock price

import re

#

into the re database

from

import re

#

span> lxml

import etree

#

Import the lxml library (without this library, pip install lxml install)

cookies =

{

'

BIDUPSID

':

'

90EF3BD78F53BC8C96DF84CD3854CA2D

'

,

'

PSTM

':

'

1578233930

'

,

'

BD _ UPN

':

'

12314753

'

,

'

BAIDUID

':

'

885754C8E6BD7B1A771802631815CC6D:FG=1

'

,

'

BDORZ

':

'

B490B5EBF6F3CD402E515D22BCDA1598

'

,

'

BDUSS

':

'

mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpcWtabHRlSVFBACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAA p> COOKIE _ SESSION

':

'

7 _ 0 _ 5 _ 3 _ 11 _ 3 _ 0 _ 0 _ 4 _ 2 _ 1 _ 0 _ 73199 _ 0 _ 169 _ 0 _ 1580456363 _ 0 _ 1580456194%7C9%23622712 _ 32 _ 1580376248%7C6

'

,

'

cflag

':

'

13%3A3

'

,

'

BD _ HOME

':

'

1

'

,

'

BDRCVFR[ feWj1Vr5u3D ]

':

'

I67x6TjHwwYf0

'

,

'

delPer

':

'

0

'

,

'

BD _ CK _ SAM

':

'

1

'

,

'

PSINO

':

'

3

'

,

'

H _ PS _ PSSID

':

'

1438 _ 21104 _ 26350

'

,

'

H _ PS _ 645EC

' :

'

29b8ZVy4WP7OUTz6%2FjeON9Iex%2FPZmThFknleY0LwzNQZ8j8

'

,

'

BDSVRTM

':

'

121

'

,

'

WWW _ ST

':

'

1580466352318

'

,}headers =

{

'

is _ xhr

':

'

1

'

,

'

Accept-Encoding

':

'

gzip, deflate, br

'

,

'

Accept-Language

':

'

zh-CN,zh;q= 0.9

'

,

'

User-Agent

':

'

Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36

'

,

'

is _ pbs

':

'

600754

'

,

'

Accept

':

'

*/*

'

,

'

Referer

':

'

%2FjeON9Iex%2FPZmThFknleY0LwzNQZ8j8&rsv _ pq=b379448d00013935

'

,

'

X-Re-With

':

'

XMLHttpRequest

'

,

'

Connection

':

'

keep-alive

'

,

'

is _ referer

':

'

'

,}params =

(((

'

ie

', [

'

utf-8

',

'

utf-8

'

]), (

'

newi

',

'

1

'

), (

'

mod

',

'

1

'

), (

'

isbd

',

'

1

'

), (

'

isid

',

'

b379448d00013935

'

), (

'

wd

',

'

600754

'

), (

'

rsv _ spt

',

'

1

'

), (

'

rsv _ iqid

',

'

0xa5a17c8700013159

'

), (

'

issp

',

'

1

'

), (

'

f

',

'

8

'

), (

'

rsv _ bp

',

'

1

'

), (

'

rsv _ idx

',

'

2

'

), (

'

rqlang

',

'

cn

'

), (

'

tn

',

'

baiduhome _ pg

'

), (

'

rsv _ enter

',

'

0

'

), (

'

rsv _ dl

',

'

tb

'

), (

'

oq

',

'

600754

'

), (

'

rsv _ t

',

'

29b8ZVy4WP7OUTz6/jeON9Iex/PZmThFknleY0LwzNQZ8j8

'

), (

'

rsv _ pq

',

'

b379448d00013935

'

), (

'

bs

',

'

600754

'

), (

'

rsv _ sid

',

'

1438 _ 21104 _ 26350

'

), (

'

_ ss

' ,

'

1

'

), (

'

clist

',

''

), (

'

hsug

',

''

), (

'

f4s

',

'

1

'

), (

'

csor

',

'

6

'

), (

'

_ cr1

',

'

29647

'

), )response = re.get(

'

', headers=headers, params=params, cookies=

cookies) .texthtml =

etree.HTML(response)a = html.xpath(

'

//span[ @class = "op-stockdynamic-moretab-cur-num c-gap-right-small" ]/text()

'

)

print(

'

Current price:

',a[ 0 ])

#

Current priceb = html.xpath(

'

//ul[ @class = "op-stockdynamic-moretab-info"] /li[ 8 ]/span[ 2 ]/text()

'

)

print(

'

Current market value:

',b[ 0 ])

#

Current market value

Label group:[python] [python crawler] [xpath] [lxml

Extended reading

Same topic

recommend

Popular