NumPy基础
NumPy几个重要定义
axis:维度
rank:维度数
shape:各维度的元素数
size:数组内所有元素的数量
创建数组
1、创建一个自定义大小和内容的数组 1 np_array = np.array([[1 ,2 ,3 ,4 ],[5 ,6 ,7 ,8 ]])
2、创建一个全为0的1*5的数组 1 zero_array = np.zeros((5 ))
3、创建一个全为1的4*4的数组 1 one_array = np.ones((4 ,4 ))
4、创建全为一个数的3*3数组 1 full_array = np.full((3 ,3 ),7 )
5、创建一个空的2*3数组,数组值来自内存任意数 1 empty_array = np.empty((2 ,3 ))
6、改变输出数组的形状 1 2 x = np.array([[1 ,2 ,3 ],[4 ,5 ,6 ]]) y = np.reshape(x,(3 ,2 ))
7、将数组读取改为一维
注意,以上两种方法只是改变了数组的输出方式,并没有复制成为新的数组,如果改变y、z的内容,x内容也会相应改变。
8、复制数组到新的数组
9、生成(0,1)随机数 1 random_array = np.random.random((5 ,5 ))
10、生成整数随机数 1 rand1 = np.random.randint(100 ,size=(5 ,2 ))
11、生成服从正态分布的随机数 1 rand3 = np.random.randn(100 )
12、生成4维单位矩阵
13、生成1-5的等差数列
14、生成规定步长的等差数列 1 array = np.arange(1 ,5 ,0.5 )
15、平均分(0,5)区间 1 array = np.linspace(0 ,5 ,10 )
16、自定义函数处理数组 1 2 3 4 def my_function (z,y,x ): return x*y+z array = np.fromfunction(my_function,(3 ,2 ,10 ))
索引
1、二维数组索引 1 2 3 4 5 6 7 matrix1 = np.array([[1 ,2 ,3 ],[4 ,5 ,6 ],[7 ,8 ,9 ]]) print (matrix1)print (matrix1[0 ,2 ])matrix1[0 ,1 ] = 100 print (matrix1)print (matrix1[[0 ,2 ,2 ],[0 ,1 ,0 ]])print (matrix1[(0 ,2 ),2 :5 ])
2、高维数组索引 1 2 3 4 5 6 matrix_3d = np.arange(80 ).reshape(4 ,5 ,4 ) print (matrix_3d)print (matrix_3d[2 ,...])print (matrix_3d[0 ,2 ,...])print (matrix_3d[3 ,...,2 ])print (matrix_3d[3 ,:,2 ])
3、布尔索引 1 2 3 4 5 6 7 8 9 10 11 matrix3 = np.array([[1 ,2 ],[3 ,4 ],[5 ,6 ]]) print (matrix3)bool_idx = (matrix3 > 2 ) print (bool_idx)print (matrix3[bool_idx])print (matrix3%2 == 1 )matrix4 = np.arange(36 ).reshape(3 ,12 ) rows_on = [True , False , True ] cols_on = [True , False , True , False , True , False , True , False , True , False , True , False ] print (matrix4[np.ix_(rows_on,cols_on)])
4、提取数组元素 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 two_dimensional_list = np.array([[1 , 2 , 3 ], [4 , 5 , 6 ], [7 , 8 , 9 ]]) for item in two_dimensional_list: print (item) for row in two_dimensional_list: print (row) for column in two_dimensional_list.T: print (column) three_dimentiional_list = np.array([[[1 , 2 , 3 ], [4 , 5 , 6 ], [7 , 8 , 9 ]], [[10 , 11 , 12 ], [13 , 14 , 15 ], [16 , 17 , 18 ]]]) for item in three_dimentiional_list.flat: print (item) for item in three_dimentiional_list: print (item)
数学函数、统计、线性代数
1、Arithmetic operations:+ - * / // % **
1 2 3 4 5 6 7 8 matrix1 = np.array([[1 , 2 , 3 ], [4 , 5 , 6 ]]) matrix2 = np.array([[4 , 5 , 6 ], [7 , 8 , 9 ]]) print (np.add(matrix1, matrix2)) print (np.subtract(matrix1, matrix2)) print (np.multiply(matrix1, matrix2)) print (np.floor_divide(matrix1, matrix2)) print (np.power(matrix1, matrix2)) print (np.mod(matrix1, matrix2))
2、function
1 2 3 4 5 6 7 8 9 10 11 rand_matrix = np.array([[20 ,15.4 ,-12.8 ,-1.1 ,-8.8 ],[-18.3 ,-19.6 ,20.2 ,-15.5 ,43.1 ]]) print (np.isnan(rand_matrix)) print (np.sign(rand_matrix)) print (np.nonzero(rand_matrix)) print (np.ceil(rand_matrix)) print (np.cumsum(rand_matrix)) print (np.diff(rand_matrix)) print (np.sqrt(rand_matrix)) print (np.square(rand_matrix)) print (np.sin(rand_matrix)) print (np.sort(rand_matrix))
3、统计
1 2 3 4 5 6 7 8 9 10 matrix = np.arange(-5 ,5 ).reshape(2 ,5 ) print (matrix.mean()) print (np.median(matrix)) print (np.argmax(matrix)) print (np.argmin(matrix)) print (np.sum (matrix)) print (np.abs (matrix)) print (np.max (matrix)) print (np.sum (matrix,axis=0 )) print (np.sum (matrix,axis=1 ))
4、线性代数
1 2 3 4 5 6 7 8 9 matrix1 = np.arange(16 ).reshape(4 ,4 ) matrix2 = np.arange(8 ).reshape(2 ,4 ) print (matrix1.T) print (matrix1.dot(matrix2.T)) print (np.linalg.inv(matrix1)) print (np.linalg.det(matrix1)) eigenvalues, eigenvectors = np.linalg.eig(matrix1) print (eigenvalues)print (eigenvectors)
5、求解线性方程
1 2 3 4 coeffs = np.array([[2 ,6 ],[5 ,3 ]]) depvars = np.array([6 ,-9 ]) solution = np.linalg.solve(coeffs,depvars) print (solution)
合并、分割、广播
1、交换维度 1 2 3 4 5 m1 = np.ones((1 ,2 ,3 )) print (np.transpose(m1,(1 ,0 ,2 ))) m2 = np.ones((2 ,3 ,4 ,5 )) print (np.transpose(m2)) m3 = m2.swapaxes(1 ,2 )
2、合并 1 2 3 4 5 6 7 8 9 10 11 12 13 array1 = np.full((1 ,5 ),1.0 ) array2 = np.full((1 ,5 ),2.0 ) stake_array = np.vstack((array1,array2)) print (stake_array)stake_array = np.hstack((array1,array2)) print (stake_array)stake_array = np.concatenate((array1,array2),axis=0 ) print (stake_array)stake_array = np.concatenate((array1,array2),axis=1 ) print (stake_array)stake_array = np.stack((array1,array2)) print (stake_array)
3、分割 1 2 3 4 5 6 big_matrix = np.arange(36 ).reshape(9 ,4 ) m1,m2,m3 = np.vsplit(big_matrix,3 ) m4,m5 = np.hsplit(big_matrix,2 ) m1 = np.arange(4 ) m2 = np.tile(m1,(4 ,1 ))
4、广播
广播的前提:两个数组必须可以转化成维度大小一样的数组才能进行计算 -
规则1、如果两个数组维度不同,那么小维度数组的形状将在最左边补1 -
规则2、如果两个数组的形状在任何一个维度上都不匹配,那么数组的形状会沿着维度为1的维度扩展,匹配另一个数组的形状
-
规则3、如果两个数组在任何一个维度上都不匹配且没有一个维度为1,则发生异常
1 2 3 4 5 6 7 8 m2 = np.arange(6 ).reshape(2 ,3 ) print (m2+[[10 ],[20 ]]) print (m2+[10 ,20 ,30 ]) print (m2+10 ) m3 = np.array([[1 , 2 , 3 ], [4 , 5 , 6 ], [7 , 8 , 9 ], [10 , 11 , 12 ]]) m4 = np.array([1 ,0 ,1 ]) print (m3+m4)
Pandas基础
Series和DataFrame
1、Seires 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 random_items = np.random.randint(25 ,size=10 ) series_data = pd.Series(random_items) print (series_data[0 ]) letter_index = ['a' ,'b' ,'c' ,'d' ,'e' ,'f' ,'g' ,'h' ,'i' ,'j' ] new_series = pd.Series(random_items,index=letter_index) print (new_series) data_dict = {'a' :1 ,'b' :2 ,'c' :3 ,'d' :4 ,'e' :5 } dict_series = pd.Series(data_dict) print (dict_series) dict_series = pd.Series(data_dict,name = 'series_name' ) print (dict_series) dict_series = dict_series.rename('new_name' ) print (dict_series) print (dict_series.median()) print (dict_series>dict_series.median())
2、DataFram 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 d = {'one' :[1 ,2 ,3 ,4 ] ,'two' :[4 ,3 ,2 ,1 ]} dict_df = pd.DataFrame(d) print (dict_df)print (dict_df['one' ]) print (dict_df.shape) d_data = { 'one' :pd.Series([1 ,2 ,3 ,],name = 'col_one' ,index = ['a' ,'b' ,'c' ]), 'two' :pd.Series([1 ,2 ,3 ,4 ],name = 'col_two' ,index = ['a' ,'b' ,'c' ,'d' ]) } df = pd.DataFrame(d_data) print (df)new_df = df.reset_index(drop = True ) print (new_df)print (df)df.reset_index(drop = True ,inplace = True ) df.reset_index(drop = False ,inplace = True )
3、大型表格索引 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 country_info= pd.read_csv('D:\Downloads\country.csv' ) print (country_info.columns) print (country_info.head(3 )) print (country_info.head()) print (country_info.tail()) print (country_info['Region' ]) print (country_info[['Region' ,'Country' ]]) print (country_info.iloc[100 ]) print (country_info.iloc[[2 ,3 ]]) print (country_info.iloc[[2 ,3 ],[0 ,3 ]]) country_info['Country' ] = country_info['Country' ].str .strip() country_info.set_index(['Country' ],drop=True ,inplace=True ) print (country_info.loc['China' ]) print (country_info.loc[['China' ,'India' ]]) print (country_info.loc[['China' ,'India' ],['Region' ,'Population' ]]) print (country_info.loc['China' :'India' ,'Region' :'Deathrate' ])
使用read_csv获取country数据,读取前十行的数据,只选取Country、Birthrate和Service,将其中的数据变成DataFrame,并使用to_csv函数将结果存到本地的country.csv文件中。
1 2 3 4 5 6 country_info= pd.read_csv('D:\Downloads\country.csv' ) print (country_info.head(10 )) print (country_info[['Country' ,'Birthrate' ,'Service' ]]) df_country = pd.DataFrame(country_info[['Country' ,'Birthrate' ,'Service' ]]) print (df_country)df_country.to_csv('D:\Downloads\country.csv' ,index=False )
Filtering
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 country_info= pd.read_csv("D:\Downloads\country.csv" ) print (country_info['Net migration' ] =='0' ) zero_migration_filter = (country_info['Net migration' ] =='0' ) print (country_info[zero_migration_filter]) print (country_info.loc[zero_migration_filter,['Region' ,'Country' ,'Net migration' ]]) low_migration_filter = (country_info['Net migration' ] <'100' ) print (country_info.loc[low_migration_filter,['Region' ,'Population' ]]) and_filter = (country_info['Deathrate' ]>'1000' )&(country_info['Population' ]<10000 ) print (country_info.loc[and_filter,['Region' ,'Population' ]]) or_filter = (country_info['Deathrate' ]>'1000' )|(country_info['Population' ]<10000 ) print (country_info.loc[or_filter,['Region' ,'Population' ]]) population_filter = (country_info['Population' ]>100000 ) print (country_info.loc[~population_filter,['Region' ,'Population' ]]) countries = ['China' ,'Japan' ,'United States' ,'Russia' ,'India' ] infilter = country_info['Country' ].isin(countries) print (country_info.loc[infilter,['Region' ,'Population' ]]) str_filter = country_info['Country' ].str .contains("A" ) print (country_info.loc[~str_filter,['Region' ,'Population' ]]) str_filter2 = country_info['Country' ].str .contains("A|Z" ) print (country_info.loc[str_filter2,['Region' ,'Population' ]]) str_filter3 = country_info['Country' ].str .contains("[a-m]" ) print (country_info.loc[str_filter3,['Region' ,'Population' ]])
排序和增删查改
1、排序搜索 1 2 3 4 5 6 7 8 9 10 11 12 survey_df = pd.read_csv("D:\Downloads\small_survey_results.csv" ) survey_df.set_index('Respondent' , inplace=True ) print (survey_df.sort_index()) print (survey_df.sort_values(by='Age' ,ascending = False )) print (survey_df.sort_values(by=['Age' ,'YearsCode' ],ascending = [False ,True ])[['Age' ,'YearsCode' ]]) print (survey_df.sort_values(by=['Age' ,'YearsCode' ],ascending = [0 ,1 ])[['Age' ,'YearsCode' ]]) print (survey_df['Age' ].sort_values())print (survey_df['ConvertedComp' ].nlargest(10 )) print (survey_df['ConvertedComp' ].nsmallest(10 )) richest_users = survey_df.nlargest(10 ,'ConvertedComp' ) print (richest_users[['ConvertedComp' ,'DevType' ,'EdLevel' ]])
2、增删改查
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 survey_df = pd.read_csv("D:\Downloads\small_survey_results.csv" ) for col_name,col_data in survey_df.items(): print (col_name) print (col_data) break for row_index,row_data in survey_df.iterrows(): print (row_index) print (row_data) break inversed_df = pd.DataFrame({idx:values for idx,values in survey_df.items()}) print (inversed_df.shape)survey_df.rename(columns = {'Age' :'user age' ,'Orgsize' :'organization size' },inplace=True ) print (survey_df.columns)survey_df.columns = survey_df.columns.str .replace(' ' ,'_' ) print (survey_df.columns)survey_df.columns = [col.lower() for col in survey_df.columns] print (survey_df.columns)print (survey_df['jobfactors' ].str .lower()) survey_df.loc[2 ,'trans' ] = 'Yes' print (survey_df.loc[2 ])survey_df.loc[2 ,['trans' ,'user_age' ,'country' ]] = ['Yes' ,29 ,'China' ] print (survey_df.loc[2 ])survey_df.loc[survey_df['user_age' ]<18 ,['age_group' ]] = 'young' print (survey_df.loc[survey_df['user_age' ]<18 ])survey_df.loc[survey_df['user_age' ]>=18 ,['age_group' ]] = 'adult' print (survey_df.loc[survey_df['user_age' ]>=18 ])survey_df['gen_col' ]=survey_df['gender' ]+survey_df['sexuality' ]+survey_df['trans' ] print (survey_df['gen_col' ])survey_df['job_factors' ]=survey_df['jobfactors' ].str .split(';' ) print (survey_df['job_factors' ])survey_df[['job-fac1' ,'job-fac2' ,'job-fac3' ]] = survey_df['jobfactors' ].str .split(';' ,expand=True ) print (survey_df[['job-fac1' ,'job-fac2' ,'job-fac3' ]]) survey_df._append({'user_age' :30 ,'country' :'China' },ignore_index=True ) print (survey_df)new_survey={'user_age' :25 ,'country' :'China' ,'age1stcode' :24 } new_survey_df = pd.DataFrame(new_survey,index=[0 ]) print (survey_df._append(new_survey_df,ignore_index=True ,sort=False )) print (survey_df.drop(columns=['soaccount' ],inplace=True ))print (survey_df.drop(index=4 ))age_filter = survey_df['user_age' ]<50 print (survey_df.drop(index=survey_df[age_filter].index))
3、apply,map,applymap,replace
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 survey_df = pd.read_csv("D:\Downloads\small_survey_results.csv" ) dtypes = survey_df.dtypes str_cols=[col_name for col_name in dtypes.index if dtypes[col_name]=='object' ] survey_df[str_cols]=survey_df[str_cols].astype(str ) print (survey_df['Hobbyist' ].apply(len ))def upper_case (col_value ): return col_value.upper() print (survey_df['Hobbyist' ].apply(upper_case)) survey_df['Hobbyist' ] = survey_df['Hobbyist' ].apply(lambda x: x.upper()) print (survey_df['Hobbyist' ])print (survey_df.apply(len ,axis='columns' )) print (survey_df.apply(lambda x:x.min ())) map_dict = {'YES' :True ,'NO' :False } print (survey_df['Hobbyist' ].map (map_dict)) print (survey_df[str_cols].applymap(len )) map_dict={'YES' :True } print (survey_df['Hobbyist' ].replace(map_dict))
聚合、分组、数据清理
1、聚合、分组
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 survey_df = pd.read_csv("D:\Downloads\small_survey_results.csv" ) print (survey_df['ConvertedComp' ].median()) print (survey_df['ConvertedComp' ].describe()) print (survey_df['Hobbyist' ].value_counts(normalize=True )) country_groups=survey_df.groupby(['Country' ]) print (country_groups.get_group('India' )) print (survey_df.groupby(['Country' ]).count()) print (survey_df.groupby(['Country' ]).get_group('China' )['OpSys' ].value_counts()) print (country_groups['OpSys' ].value_counts().loc['China' ]) print (country_groups['ConvertedComp' ].median().loc['Germany' ]) print (country_groups['ConvertedComp' ].agg(['median' ,'mean' ])) country_filter = survey_df['Country' ] == 'United States' print (survey_df.loc[country_filter]['LanguageWorkedWith' ].str .contains('Python' ).sum ()) country_groups = survey_df.groupby(['Country' ]).sum () print (country_groups) country_groups = survey_df.groupby('Country' ) country_users_python = country_groups['LanguageWorkedWith' ].apply(lambda x:x.str .contains('Python' ).sum ()) country_respondents = survey_df['Country' ].value_counts() concated_df = pd.concat([country_respondents, country_users_python], axis='columns' , sort=False ) concated_df.rename(columns={'Country' :'NumIfUsers' , 'LanguageWorkedWith' :'NumOfPythonUsers' }, inplace=True ) print (concated_df)
2、数据清理 1 2 3 4 5 6 7 8 9 10 11 12 survey_df = pd.read_csv("D:\Downloads\small_survey_results.csv" ) print (small_survey_df.dropna()) print (small_survey_df.dropna(axis='index' ,how='all' )) print (small_survey_df.dropna(axis='index' ,how='all' ,subset=['CompFreq' ,'CompTotal' ])) small_survey_df.replace('No' ,np.nan,inplace=True ) print (small_survey_df.isna()) print (small_survey_df.fillna('Missing' )) survey_df['YearsCode' ].replace('Less than 1 year' ,0 ,inplace=True ) print (survey_df)survey_df['YearsCode' ] = survey_df['YearsCode' ].astype(float ) print (survey_df['YearsCode' ].mean())
3、实际应用
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 framework_df = survey_df['WebframeWorkedWith' ].str .split(';' , expand=True ) framework_df.fillna('None' , inplace=True ) country_groups = survey_df.groupby(['Country' ] ) district_frameworks = np.unique(framework_df.values) framework_sum_array = [] for framework in district_frameworks: new_df = country_groups['WebframeWorkedWith' ].apply(lambda x: x.str .contains(framework).sum ()) new_df.name = framework framework_sum_array.append(new_df) user_count = survey_df['Country' ].value_counts() concated_df = pd.concat([user_count]+framework_sum_array, axis='columns' ) print (concated_df)most_popular_df = concated_df.drop(columns = ['Country' ,'None' ]).idxmax(axis=1 ) most_popular_df.name = 'most_popular_framework' final_df = concated_df.join(most_popular_df)[['Country' ,'most_popular_framwork' ]]
横向合并和纵向合并
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 movies = pd.DataFrame({'movie_id' :[1 ,2 ,3 ,5 ,7 ], 'title' :['t1' ,'t2' ,'t3' ,'t5' ,'t7' ], 'description' :['d1' ,'d2' ,'d3' ,'d5' ,'d7' ]}) ratings = pd.DataFrame({'user_id' :[1 ,2 ,7 ,9 ,11 ], 'movie_id' :[1 ,2 ,3 ,5 ,6 ], 'title' :['t1' ,'t2' ,'t3' ,'t5' ,'t6' ], 'rating' :[2 ,3 ,1 ,5 ,4 ], 'time' :['t1' ,'t2' ,'t4' ,'t4' ,'t1' ]}) print (pd.merge(movies,ratings)) print (pd.merge(movies,ratings,on=['movie_id' ,'title' ])) print (pd.merge(movies,ratings,left_on='movie_id' ,right_on='user_id' )) print (pd.merge(movies,ratings,left_index=True ,right_index=True )) print (pd.merge(movies,ratings,on=['movie_id' ],suffixes=['_left' ,'_right' ]))print (pd.merge(movies,ratings,on=['movie_id' ,'title' ],how='outer' ,indicator='indicator' )) print (movies.join(ratings,on='movie_id' ,lsuffix='_left' ,rsuffix='_right' )) print (pd.concat([movies,ratings])) print (pd.concat([movies,ratings],ignore_index=True )) print (pd.concat([movies,ratings],join='inner' ,axis=1 )) print (movies._append([ratings,movies],ignore_index=True ))
Pandas数据可视化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 usa_city_population = pd.DataFrame({ 'population' : [8175133 ,3792621 ,2695598 ,2100263 ,19354922 ],},index=['New York' ,'Los Angeles' ,'Chicago' ,'Houston' ,'Phoenix' ]) axs = usa_city_population.plot.pie(y='population' ) china_city_population = pd.DataFrame({'population' : [15773658 ,2180357 , 1208360 , 1035837 , 10039107 ]},index=['广州' ,'上海' ,'北京' ,'天津' ,'重庆' ]) axs = china_city_population.plot.pie(y='population' ,figsize = (5 ,5 )) top_city_population = pd.DataFrame({ 'uas' :usa_city_population['population' ].values, 'china' :china_city_population['population' ].values},index=['top1' ,'top2' ,'top3' ,'top4' ,'top5' ]) axs = top_city_population.plot.bar(rot=0 ) top_city_population.reset_index().plot.bar(x='index' ,y=['china' ],rot=0 ) data=pd.Series(np.random.randn(1000 ),index=np.arange(1000 )) axs = data.cumsum().plot() data=pd.DataFrame(np.random.randn(1000 ,4 ),index=np.arange(1000 ),columns=['A' ,'B' ,'C' ,'D' ]) data=data.cumsum() data.plot.scatter(x='A' ,y='B' ,color='Green' ,label = 'Class1' ) ax1 = data.plot.scatter(x='A' ,y='B' ,color='Green' ,label = 'Class1' ) data.plot.scatter(x='A' ,y='C' ,color='Red' ,label = 'Class2' ,ax=ax1) data.plot.box() data = np.abs (data) axs = data.plot.area(figsize=(12 ,4 ),subplots=True ) plt.show()
Matplotlib基础
1 2 3 4 5 6 7 8 x1 = np.linspace(-5 ,5 ,50 ) y1 = 2 *x1 y2 = x1**2 -10 plt.figure(figsize=(5 ,6 )) plt.plot(x1,y1) plt.show()
plot
1 2 plt.plot(x1,y2,color="green" ,linewidth=2 ,linestyle='--' ,label='y=x^2-10' ) plt.show()
plot
1 2 3 4 5 fix,axe = plt.subplots() axe.plot(x1,y1,label='y=2x' ) axe.plot(x1,y2,label='y=x^2-10' ) plt.show()
subplot
1 2 3 4 5 6 7 8 9 10 11 12 13 14 fig=plt.figure() plot=fig.add_subplot(121 ) plot.plot(x1,y1) plot=fig.add_subplot(122 ) plot.plot(x1,y2) plt.show() plt.subplot(1 ,2 ,1 ) plt.plot(x1,y1) plt.subplot(1 ,2 ,2 ) plt.plot(x1,y2) plt.show()
subplot
坐标轴和边框
1 2 3 4 5 6 7 8 9 x1 = np.linspace(-5 ,5 ,50 ) y1 = 2 *x1 y2 = x1**2 -10 plt.plot(x1,y1) plt.plot(x1,y2) plt.title("Title" ,color='red' ,fontsize=20 ) plt.show()
标题
1 2 3 plt.xlabel('x轴' ) plt.ylabel('y轴' ) plt.show()
坐标轴标题
1 2 3 4 5 6 7 8 9 10 plt.xlim((-2 ,2 )) plt.ylim((-5 ,10 )) plt.show() fig,axe=plt.subplots() axe.plot(x1,y1) axe.plot(x1,y2) axe.set_xlim(-2 ,2 ) axe.set_ylim(-5 ,10 )
限制坐标轴范围
1 2 3 4 5 6 7 8 fig,axe=plt.subplots() axe.plot(x1,y1) axe.plot(x1,y2) axe.set_xlim(-2 ,2 ) axe.set_ylim(-5 ,10 ) axe.set_xticks(np.linspace(-2 ,2 ,3 )) axe.tick_params(direction='in' ,length=5 ,width=2 ,color='b' )
设置刻度线和边框
1 2 3 4 5 6 7 fig,axe=plt.subplots() axe.plot(x1,y1) axe.plot(x1,y2) axe.spines['left' ].set_position(('axes' ,0.5 )) axe.spines['bottom' ].set_position(('axes' ,0.5 )) axe.spines['right' ].set_color('none' ) axe.spines['top' ].set_color('none' )
设置边框
图例、标注
1 2 3 plt.plot(x1,y1,label='y=2x' ) plt.plot(x1,y2,label='y=x^2' ) plt.legend(loc=0 ,title="legend title" ,shadow=True ,ncol=2 ,facecolor='gray' )
设置图例
1 2 3 4 5 6 plt.plot(x1,y1) plt.plot(x1,y2) plt.text(-1 ,5 ,"two functions" ,family = "Times New Roman" ,fontsize = 12 ,style = "italic" ,color = "r" ,weight = "black" ,bbox = dict (boxstyle="round" ,facecolor = "none" ,ec = "b" )) plt.show()
无指向性标注
1 2 3 4 plt.plot(x1,y1) plt.plot(x1,y2) plt.annotate("y=2x" ,xy=(1 ,2 ),xytext=(2 ,0 ),arrowprops=dict (arrowstyle="->" ,linestyle="--" ,connectionstyle="arc3,rad=.5" ),bbox=dict (boxstyle="round,pad=0.5" ,fc="none" ,ec="gray" ))
指向性标注
多图合并、折线图、散点图、柱状图、直方图、面积图、堆叠面积图、箱型图、饼图、热力图、3D图
多图合并
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 plt.figure(figsize=(8 , 5 )) plt.subplot(2 , 3 , 2 ) plt.plot([0 , 10 ], [0 , 10 ]) plt.title("2,3,2" ) plt.subplot(2 , 3 , 4 ) plt.plot([0 , 10 ], [0 , 10 ]) plt.title("2,3,4" ) plt.subplot(2 , 3 , 5 ) plt.plot([0 , 10 ], [0 , 10 ]) plt.title("2,3,5" ) plt.tight_layout() fig, axe = plt.subplots(nrows = 2 , ncols = 2 ) axe[0 ][1 ].plot([0 , 10 ], [0 , 10 ]) plt.show() fig, axe = plt.subplots(nrows = 2 , ncols = 2 , figsize = (10 , 4 )) axe[1 ][1 ].plot([0 , 10 ], [0 , 10 ]) plt.show() fig, axe = plt.subplots(nrows = 2 , ncols = 1 ) axe[1 ].plot([0 , 10 ], [0 , 10 ]) plt.show() fig, axe = plt.subplots(nrows=2 , ncols=2 , figsize=(10 , 5 )) plt.tight_layout() axe[0 ][0 ].set_title("1st subplot" ) axe[1 ][1 ].set_title("4th subplot" ) axe[1 ][0 ].plot([0 ,10 ], [0 , 10 ]) plt.tight_layout() plt.show() plt.subplot(2 , 2 , 1 ) plt.title("2,2,1" ) plt.plot([0 , 10 ], [0 , 10 ]) plt.subplot(2 , 2 , 2 ) plt.plot([0 , 10 ], [0 , 10 ]) plt.title("2,2,3" ) plt.subplot(2 , 1 , 2 ) plt.plot([0 , 10 ], [0 , 10 ]) plt.title("2,1,2" ) plt.tight_layout() plt.show() from matplotlib.gridspec import GridSpecfig = plt.figure(dpi=100 ) gs = GridSpec(2 , 2 , width_ratios=[1 , 2 ], height_ratios=[3 , 1 ]) ax1 = fig.add_subplot(gs[0 ]) ax1.text(0.5 , 0.5 , "1st plot" ,verticalalignment='center' , ha='center' ) ax1.plot([0 ,1 ], [0 , 1 ]) ax2 = fig.add_subplot(gs[1 ]) ax2.text(0.5 ,0.5 ,"2nd plot" ,verticalalignment='center' , ha='center' ) ax3 = fig.add_subplot(gs[2 ]) ax3.text(0.5 ,0.5 ,"3rd plot" ,verticalalignment='center' , ha='center' ) ax4 = fig.add_subplot(gs[3 ]) ax4.text(0.5 ,0.5 ,"4th plot" ,verticalalignment='center' , ha='center' ) plt.show() x = np.linspace(-3 , 3 , 100 ) y = np.sin(x) fig, ax1 = plt.subplots() ax1.plot(x, y, 'orange' ) ax1.set_xlabel('x' ) ax1.set_ylabel('y' ) ax1.set_title('Big 1' ) left, bottom, width, height = 0.25 , 0.6 , 0.2 , 0.2 ax2 = fig.add_axes([left, bottom, width, height]) ax2.plot(x, y2, 'g' ) ax2.set_xlabel('x' ) ax2.set_ylabel('y' ) ax2.set_title('small 1' ) plt.axes([0.65 , 0.2 , 0.2 , 0.25 ]) plt.plot(x, x ** 2 , 'r' ) plt.xlabel('x' ) plt.ylabel('y' ) plt.title('small 2' ) plt.show()
折线图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 x = np.linspace(-5 , 5 , 25 ) fig, axe = plt.subplots(figsize= (8 , 5 )) axe.plot(x, x + 1 , linestyle='-' , color='r' , marker='x' , label="l1" ) axe.plot(x, x + 2 , linestyle='--' , color='y' , marker='s' , label="l2" ) axe.plot(x, x + 3 , linestyle='-.' , color='m' , marker='|' , label="l3" ) axe.plot(x, x + 4 , linestyle=':' , color='g' , marker='v' , label="l4" ) axe.plot(x, x + 5 , linestyle='-' , color='b' , marker='*' , label="l5" ) axe.plot(x, x + 6 , linestyle='-' , color='c' , marker='o' , label="l6" ) axe.legend() plt.show() x = np.linspace(-4 , 4 , 100 ) fig, axe = plt.subplots(figsize= (8 , 5 )) axe.plot(x, np.sin(x), '--b' ,x + 1 , np.sin(x), '.r' ) plt.show()
散点图
1 2 3 4 5 6 7 8 9 10 11 fig, axe = plt.subplots(figsize= (8 , 5 )) rng = np.random.RandomState(66 ) x = rng.randn(50 ) y = rng.randn(50 ) colors = rng.randn(50 ) sizes = rng.randn(50 ) * 500 axe.grid() axe.scatter(x, y, c=colors, s=sizes, alpha=0.5 ) plt.show()
柱状图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 fig, axe = plt.subplots() label = ["Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" ] index = np.arange(len (label)) values1 = [100 , 150 , 300 , 220 , 660 , 320 ] values2 = [200 , 160 , 200 , 300 , 800 , 400 ] axe.bar(index, values1, width=0.4 ) axe.bar(index + 0.4 , values2, width=0.4 ) axe.set_xticks(index+0.15 ) axe.set_xticklabels(label) plt.show() fig, axe = plt.subplots() label = ["Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" ] index = np.arange(len (label)) values1 = [100 , 150 , 300 , 220 , 660 , 320 ] values2 = [200 , 160 , 200 , 300 , 800 , 400 ] axe.bar(index, values1) axe.bar(index, values2, bottom=values1) plt.show() fig, axe = plt.subplots() label = ["Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" ] index = np.arange(len (label)) values1 = [100 , 150 , 300 , 220 , 660 , 320 ] values2 = [200 , 160 , 200 , 300 , 800 , 400 ] axe.barh(index, values1) axe.barh(index, values2, left=values1) axe.set_yticks(index) axe.set_yticklabels(label) plt.show()
直方图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 fig, axe = plt.subplots(figsize = (8 , 5 )) np.random.seed(66 ) data1 = np.random.normal(-1 , 1 , 5000 ) data2 = np.random.normal(-2 , 1 , 5000 ) data3 = np.random.normal(-1 , 2 , 5000 ) axe.hist(data1, bins=50 , density=True , alpha=0.35 , label="data1" ) axe.hist(data2, bins=50 , density=True , alpha=0.35 , label="data2" ) axe.hist(data3, bins=50 , density=True , alpha=0.35 , label="data3" ) axe.legend() plt.show() fig, axe = plt.subplots(figsize = (8 , 5 )) sigma = 1 mu = 0 np.random.seed(66 ) data = np.random.normal(mu, sigma, 5000 ) n, bins, _ = axe.hist(data, bins=50 , alpha=0.35 , density=True ) y = ((1 / (np.sqrt(2 * np.pi) * sigma)) * np.exp(-0.5 * (1 / sigma * (bins - mu))**2 )) axe.plot(bins, y, '--r' ) plt.show()
面积图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 fig, axe = plt.subplots(figsize = (8 , 5 )) x = np.arange(0 , 5 , 0.01 ) y = np.sin(x * np.pi) axe.fill_between(x, y, where=(y > 0 ), facecolor = 'b' , alpha = 0.7 ) axe.fill_between(x, y, where=(y < 0 ), facecolor = 'g' , alpha = 0.7 ) plt.show() fig, axe = plt.subplots(nrows=2 , figsize = (8 , 5 )) x = [1 , 2 , 3 , 4 , 5 , 6 ] y = [1 , 3 , 5 , 7 , 9 , 11 ] np.random.seed(66 ) y1 = y + np.random.randint(1 , 5 , 6 ) y2 = y + np.random.randint(1 , 8 , 6 ) y3 = y + np.random.randint(1 , 5 , 6 ) y4 = y + np.random.randint(1 , 30 , 6 ) y5 = y + np.random.randint(1 , 5 , 6 ) y6 = y + np.random.randint(1 , 20 , 6 ) y7 = y + np.random.randint(1 , 10 , 6 ) labels = ["Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" ] axe[0 ].stackplot(x, y1, y2, y3, y4, y5, y6, y7, baseline="sym" ) axe[0 ].set_xticks(x) axe[0 ].set_xticklabels(labels) axe[0 ].set_title("Symmetric" ) axe[1 ].stackplot(x, y1, y2, y3, y4, y5, y6, y7, baseline="wiggle" ) axe[1 ].set_xticks(x) axe[1 ].set_xticklabels(labels) axe[1 ].set_title("Wiggle" ) plt.tight_layout() plt.show()
箱型图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 fig, axe = plt.subplots(figsize = (8 , 5 )) np.random.seed(66 ) labels = ["Label1" , "Label2" , "Label3" , "Label4" ] values = [] values.append(np.random.normal(100 , 20 , 200 )) values.append(np.random.normal(100 , 100 , 200 )) values.append(np.random.normal(150 , 50 , 200 )) values.append(np.random.normal(150 , 70 , 200 )) axe.boxplot(values, labels=labels) axe.boxplot(values, labels=labels, vert=False ) axe.boxplot(values, labels=labels,patch_artist=True , boxprops=dict (facecolor='teal' , color='r' )) plt.show()
饼图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 fig, axe = plt.subplots(figsize = (8 , 5 )) labels = ["P1" , "P2" , "P3" , "P4" , "P5" , "P6" ] labels2 = ["S1" , "S2" , "S3" ] values = [200 , 300 , 88 , 66 , 110 , 168 ] values2 = [500 , 100 , 200 ] explode = [0 ,0 ,0 ,0.3 ,0 ,0 ] explode2 = [0 , 0.1 , 0 ] axe.pie(values, radius=1.5 , wedgeprops=dict (width=0.5 ), autopct='%.2f%%' , pctdistance=0.8 , labels=labels, labeldistance=1.05 , explode=explode) axe.pie(values2, radius=1 , wedgeprops=dict (width=0.5 ), autopct='%.2f%%' , pctdistance=0.8 , labels=labels2, labeldistance=0.3 , explode=explode2) plt.show()
热力图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 fig, axe = plt.subplots(figsize = (8 , 5 )) axe.set_xticks(np.arange(len (xlabels))) axe.set_yticks(np.arange(len (ylabels))) axe.set_xticklabels(xlabels) axe.set_yticklabels(ylabels) im = axe.imshow(values) for i in range (len (xlabels)): for j in range (len (ylabels)): text = axe.text(i, j, values[i, j], horizontalalignment="center" , verticalalignment="center" , color="w" ) axe.figure.colorbar(im, ax=axe) plt.show()
3D图
1 2 3 4 5 6 7 8 9 10 11 fig = plt.figure(figsize = (8 , 5 )) axes = plt.axes(projection = "3d" ) X = np.linspace(-5 , 5 , 200 ) Y = np.linspace(-5 , 5 , 200 ) X, Y = np.meshgrid(X, Y) Z = np.cos(np.sqrt(X**2 + Y**2 )) surf = axes.plot_surface(X, Y, Z, cmap=plt.get_cmap("plasma" )) plt.colorbar(surf) plt.show()