This article has participated in the “Digitalstar Project” and won a creative gift package to challenge the creative incentive money

takeaway

Fat friends, recently a fan found me blunt offer let me do my homework, I listened to the spot on the eye. Am I such a person?

Ask him directly how much money, hee hee hee!! Of course, how much money is not dry, since it is a fan I certainly try to help ah, so I began today’s blog.

Climb the Chinese university rankings

Link: Chinese university rankings are actually quite simple, this fan certainly did not have a good look at my previous articles, this kind of climb said many times. So let’s just do it.

We directly find the ID or class of the information we want to obtain

res=requests.get(url=url,headers=header).content.decode('utf-8')
soup=BeautifulSoup(res,'lxml')
names=soup.findAll(name="a",attrs={"class":"name-cn"})
xinxi=soup.findAll("td")
with open("Chinese University. CSV".'a', encoding="utf-8", newline="") as f:
    for i in range(0.len(xinxi),6):
        w=csv.writer(f)
        w.writerow([xinxi[i].text.strip(),xinxi[i+1].text.strip(),xinxi[i+2].text.strip(),xinxi[i+3].text.strip(),xinxi[i+4].text.strip(),xinxi[i+5].text.strip()])


Copy the code

And get the information

Visual analysis

The word cloud

We have got the information above. We first do a word cloud analysis on the provinces and categories where these universities are concentrated

You can see that the majority of comprehensive and Beijing

import jieba
from wordcloud import WordCloud
from matplotlib import colors
import csv
import pandas as pd

info=pd.read_csv("Chinese University. CSV",usecols=[2.3])
text=info
cut_text = "".join(jieba.cut(str(text)))
color_list=['#FF0000'.'#9955FF'.'#66FFFF']Create a color array
colormap=colors.ListedColormap(color_list)# call
#color_mask = cv2.imread("11.jpg")
word_cloud=WordCloud(
    font_path="msyh.ttc",
    background_color='black',
    mode="RGBA",
    prefer_horizontal=1.#mask=color_mask,
    height=200,
    width=200,
    scale=1,
    colormap=colormap,# set color
    margin=5
)

word_cloud1=word_cloud.generate(cut_text)
word_cloud1.to_file('2.png')

print("Image saved successfully")

Copy the code

Bar chart

This is done with Echarts and if you’re interested you can do it with CuffLinks and get the same effect

// Only the js variable part is posted here
option = {
  title: {
    text: 'University Data in China'
  },
  tooltip: {
    trigger: 'axis'
  },
  legend: {
    data: ['total'.'School level']},grid: {
    left: '3%'.right: '4%'.bottom: '3%'.containLabel: true
  },
  toolbox: {
    feature: {
      saveAsImage: {}}}.xAxis: {
    type: 'category'.boundaryGap: false.data: [1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20.21.22.23.24.25.26.27.28.29.30]},yAxis: {
    type: 'value'
  },
  series: [{name: 'total'.type: 'line'.stack: 'Total'.data: [969.2.855.3.768.7.723.4.654.8.649.7.577.0.574.3.567.9.537.9.522.6.519.3.518.3.516.6.513.8.508.3.488.1.487.8.474.0.465.3.447.0.444.3.442.2.435.7.430.5.427.8.419.8.418.2.401.8.400.4] {},name: 'School level'.type: 'line'.stack: 'Total'.data: [37.9.36.1.34.3.35.5.35.1.36.6.40.0.32.1.31.8.34.5.32.7.30.9.34.8.30.7.32.8.33.2.34.3.34.5.32.3.31.5.28.8.32.7.30.8.30.4.32.4.32.7.30.5.30.2.35.2.31.8]]}};Copy the code

Obtain the dynamic geographic coordinates of universities above 211 in China

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['animation.writer'] = 'html'
plt.rcParams['animation.embed_limit'] = 100


def rgba_to_rgb(img_rgba) :
    img_rgb = Image.new("RGB", img_rgba.size, (255.255.255))
    img_rgb.paste(img_rgba, mask=img_rgba.split()[3])
    return img_rgb


def html_to_gif(html_file, gif_file, duration=0.5) :
    path = html_file.replace(".html"."_frames")
    images = [os.path.join(path, x) for x in sorted(os.listdir(path))]
    frames = [imageio.imread(x) for x in images]
    if frames[0].shape[-1] = =4:
        frames = [np.array(rgba_to_rgb(Image.fromarray(x))) for x in frames]
    imageio.mimsave(gif_file, frames, 'gif', duration=duration)
    return gif_file

cmap = ['#2E91E5'.'#1CA71C'.'#DA16FF'.'#B68100'.'#EB663B'.'#00A08B'.'#FC0080'.'#6C7C32'.'#862A16'.'# 620042'.'#DA60CA'.'#0D2A63'] * 100

def getCoords(geom) :
    if isinstance(geom, geo.MultiPolygon):
        return [np.array(g.exterior) for g in geom.geoms]
    elif isinstance(geom, geo.Polygon):
        return [np.array(geom.exterior)]
    elif isinstance(geom, geo.LineString):
        return [np.array(geom)]
    elif isinstance(geom, geo.MultiLineString):
        return [np.array(x) for x in list(geom.geoms)]
    else:
        raise Exception("geom must be one of [polygon,MultiPolygon,LineString,MultiLineString]!")


# Base map data
dfprovince = gpd.read_file("dfprovince.geojson").set_crs("epsg:4326").to_crs("epsg:2343")
dfnanhai = gpd.read_file("dfnanhai.geojson").set_crs("epsg:4326").to_crs("epsg:2343")
dfline9 = dfnanhai[(dfnanhai["LENGTH"] > 1.0) & (dfnanhai["LENGTH"] < 2.0)]

# Scatter data
df985 = gpd.read_file("985 University of China. Geojson").set_crs("epsg:4326").to_crs("epsg:2343")
df211 = gpd.read_file("211 University of China. Geojson").set_crs("epsg:4326").to_crs("epsg:2343")
dfpoints = pd.concat([df985, df211], axis=0)
df = pd.DataFrame({"x": [pt.x for pt in dfpoints["geometry"]],
                   "y": [pt.y for pt in dfpoints["geometry"]]})
df["z"] = 1.0
df.index = dfpoints["name"].values


def bubble_map_dance(df, title=Location distribution of 116 211 Universities in China,
                     filename=None,
                     figsize=(8.6), dpi=144,
                     duration=0.5,
                     anotate_points=[Beijing University of Posts and Telecommunications."Nanchang University"."Huazhong Agricultural University".Donghua University.Yunnan University.Shaanxi Normal University.Inner Mongolia University."Xizang University"."Xinjiang University"."Qinghai University"."Harbin Engineering University"]) :
    fig, ax_base = plt.subplots(figsize=figsize, dpi=dpi)
    ax_child = fig.add_axes([0.800.0.125.0.10.0.20])

    def plot_frame(i) :

        ax_base.clear()
        ax_child.clear()
        # Draw provincial boundaries
        polygons = [getCoords(x) for x in dfprovince["geometry"]]
        for j, coords in enumerate(polygons):
            for x in coords:
                poly = plt.Polygon(x, fill=True, ec="gray", fc="white", alpha=0.5, linewidth=8.)
                poly_child = plt.Polygon(x, fill=True, ec="gray", fc="white", alpha=0.5, linewidth=8.)
                ax_base.add_patch(poly)
                ax_child.add_patch(poly_child)

        Draw a nine-dash line
        coords = [getCoords(x) for x in dfline9["geometry"]]
        lines = [y for x in coords for y in x]
        for ln in lines:
            x, y = np.transpose(ln)
            line = plt.Line2D(x, y, color="gray", linestyle="-.", linewidth=1.5)
            line_child = plt.Line2D(x, y, color="gray", linestyle="-.", linewidth=1.5)
            ax_base.add_artist(line)
            ax_child.add_artist(line_child)

        Set the spine format
        for spine in ['top'.'left'."bottom"."right"]:
            ax_base.spines[spine].set_color("none")
            ax_child.spines[spine].set_alpha(0.5)
        ax_base.axis("off")

        # Set the drawing range
        bounds = dfprovince.total_bounds
        ax_base.set_xlim(bounds[0] - (bounds[2] - bounds[0) /10, bounds[2] + (bounds[2] - bounds[0) /10)
        ax_base.set_ylim(bounds[1] + (bounds[3] - bounds[1) /3.5, bounds[3] + (bounds[3] - bounds[1) /100)

        ax_child.set_xlim(bounds[2] - (bounds[2] - bounds[0) /2.5, bounds[2] - (bounds[2] - bounds[0) /20)
        ax_child.set_ylim(bounds[1] - (bounds[3] - bounds[1) /20, bounds[1] + (bounds[3] - bounds[1) /2)

        # Remove the axis scale
        ax_child.set_xticks([]);
        ax_child.set_yticks([]);
        k = i // 3 + 1
        m = i % 3
        text = "NO." + str(k)

        dfdata = df.iloc[:k, :].copy()
        dftmp = df.iloc[:k - 1, :].copy()

        # Draw scatter image
        if len(dftmp) > 0:
            ax_base.scatter(dftmp["x"], dftmp["y"], s=100 * dftmp["z"] / df["z"].mean(),
                            c=(cmap * 100) [0:len(dftmp)], alpha=0.3, zorder=3)
            ax_child.scatter(dftmp["x"], dftmp["y"], s=100 * dftmp["z"] / df["z"].mean(),
                             c=(cmap * 100) [0:len(dftmp)], alpha=0.3, zorder=3)

            Add comment text
            for i, p in enumerate(dftmp.index):
                px, py, pz = dftmp.loc[p, ["x"."y"."z"]].tolist()
                if p in anotate_points:
                    ax_base.annotate(p, xy=(px, py), xycoords="data", xytext=(-15.10),
                                     fontsize=10, fontweight="bold", color=cmap[i], textcoords="offset points")

        Add a title and rank number
        # ax_base.set_title(title,color = "black",fontsize = 12)
        ax_base.text(0.5.0.95, title, va="center", ha="center",
                     size=12, transform=ax_base.transAxes)
        ax_base.text(0.5.0.5, text, va="center", ha="center",
                     alpha=0.3, size=50, transform=ax_base.transAxes)

        # Add attention animation
        if m == 0:
            px, py, pz = dfdata["x"] [[...1]], dfdata["y"] [[...1]], dfdata["z"] [-1]
            p = dfdata.index[-1]
            ax_base.scatter(px, py, s=800 * pz / df["z"].mean(),
                            c=cmap[len(dfdata) - 1:len(dfdata)], alpha=0.5, zorder=4)
            ax_base.annotate(p, xy=(px, py), xycoords="data",
                             xytext=(-15.10), fontsize=20, fontweight="bold",
                             color=cmap[k - 1], textcoords="offset points", zorder=5)

        if m == 1:
            px, py, pz = dfdata["x"] [[...1]], dfdata["y"] [[...1]], dfdata["z"] [-1]
            p = dfdata.index[-1]
            ax_base.scatter(px, py, s=400 * pz / df["z"].mean(),
                            c=cmap[len(dfdata) - 1:len(dfdata)], alpha=0.5, zorder=4)
            ax_base.annotate(p, xy=(px, py), xycoords="data",
                             xytext=(-15.10), fontsize=15, fontweight="bold",
                             color=cmap[k - 1], textcoords="offset points", zorder=5)

        if m == 2:
            px, py, pz = dfdata["x"] [[...1]], dfdata["y"] [[...1]], dfdata["z"] [-1]
            p = dfdata.index[-1]
            ax_base.scatter(px, py, s=100 * pz / df["z"].mean(),
                            c=cmap[len(dfdata) - 1:len(dfdata)], alpha=0.5, zorder=4)
            ax_base.annotate(p, xy=(px, py), xycoords="data",
                             xytext=(-15.10), fontsize=10, fontweight="bold",
                             color=cmap[k - 1], textcoords="offset points", zorder=5)

    my_animation = animation.FuncAnimation(fig, plot_frame, frames=range(0.3 * len(df)), interval=int(duration * 1000))

    if filename is None:
        try:
            from IPython.display import HTML
            HTML(my_animation.to_jshtml())
            return HTML(my_animation.to_jshtml())
        except ImportError:
            pass
    else:
        my_animation.save(filename)
        return filename
Copy the code

Finally said to the fat friend

For the fans of this fight, I think the real charge is probably this, I haven’t written Python for a few days, this one is straight to me. Keep an eye on me for the Java and Python web. Finally or that sentence together fat learning, refueling