Python基础算法库Numpy及可视化库使用实践-大数据ML样本集案例实战

栏目: Python · 发布时间: 7年前

内容简介：版权声明：本套技术专栏是作者（秦凯新）平时工作的总结和升华，通过从真实商业环境抽取案例进行总结和分享，并给出商业应用的调优建议和集群环境容量规划等内容，请持续关注本套博客。QQ邮箱地址：1120746959@qq.com，如有任何学术交流，可随时联系。

1 Numpy详细使用

读取txt文件

import numpy
  world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",")
  print(type(world_alcohol))

  world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="U75", skip_header=1)
  print(world_alcohol)
  
  [[u'1986' u'Western Pacific' u'Viet Nam' u'Wine' u'0']
   [u'1986' u'Americas' u'Uruguay' u'Other' u'0.5']
   [u'1985' u'Africa' u"Cte d'Ivoire" u'Wine' u'1.62']
   ..., 
   [u'1987' u'Africa' u'Malawi' u'Other' u'0.75']
   [u'1989' u'Americas' u'Bahamas' u'Wine' u'1.5']
   [u'1985' u'Africa' u'Malawi' u'Spirits' u'0.31']]
复制代码

创建一维和二维的Array数组

#The numpy.array() function can take a list or list of lists as input. When we input a list, we get a one-dimensional array as a result:
  
  #一维的Array数组[]
  vector = numpy.array([5, 10, 15, 20])
  
  #二维的Array数组[[],[],[]]
  matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
  print vector
  print matrix
复制代码

shape用法

#We can use the ndarray.shape property to figure out how many elements are in the array
  vector = numpy.array([1, 2, 3, 4])
  print(vector.shape)
  
  #For matrices, the shape property contains a tuple with 2 elements.
  matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
  print(matrix.shape)
  
  (4,)
  (2, 3)
复制代码

dtype用法（numpy要求numpy.array内部元素结构相同）

numbers = numpy.array([1, 2, 3, 4])
  numbers.dtype
  
  dtype('int32')
  
  #改变其中一个值时，其他值都会改变
  numbers = numpy.array([1, 2, 3, '4'])
  print(numbers)
  numbers.dtype
  
 
  ['1' '2' '3' '4']
   dtype('<U11')
复制代码

索引定位

[[u'1986' u'Western Pacific' u'Viet Nam' u'Wine' u'0']
   [u'1986' u'Americas' u'Uruguay' u'Other' u'0.5']
   [u'1985' u'Africa' u"Cte d'Ivoire" u'Wine' u'1.62']
   ..., 
   [u'1987' u'Africa' u'Malawi' u'Other' u'0.75']
   [u'1989' u'Americas' u'Bahamas' u'Wine' u'1.5']
   [u'1985' u'Africa' u'Malawi' u'Spirits' u'0.31']]
   
  uruguay_other_1986 = world_alcohol[1,4]
  third_country = world_alcohol[2,2]
  print uruguay_other_1986
  print third_country
  
  0.5
  Cte d'Ivoire
复制代码

索引切片

vector = numpy.array([5, 10, 15, 20])
  print(vector[0:3])  
  [ 5 10 15]
复制代码

取某一列（：表示所有行）

matrix = numpy.array([
                      [5, 10, 15], 
                      [20, 25, 30],
                      [35, 40, 45]
                   ])
  print(matrix[:,1])
  
  [10 25 40]

  matrix = numpy.array([
                  [5, 10, 15], 
                  [20, 25, 30],
                  [35, 40, 45]
               ])
  print(matrix[:,0:2])
  
  [[ 5 10]
   [20 25]
   [35 40]]
   
  matrix = numpy.array([
              [5, 10, 15], 
              [20, 25, 30],
              [35, 40, 45]
           ])
  print(matrix[1:3,0:2])
  
  [[20 25]
  [35 40]]
复制代码

对Array操作表示对内部所有元素进行操作

import numpy
  #it will compare the second value to each element in the vector
  # If the values are equal, the Python interpreter returns True; otherwise, it returns False
  vector = numpy.array([5, 10, 15, 20])
  vector == 10
  
  array([False,  True, False, False], dtype=bool)
  
  matrix = numpy.array([
              [5, 10, 15], 
              [20, 25, 30],
              [35, 40, 45]
           ])
  matrix == 25
  
  array([[False, False, False],
 [False,  True, False],
 [False, False, False]], dtype=bool)
复制代码

布尔值当索引（[False True False False]）

vector = numpy.array([5, 10, 15, 20])
  equal_to_ten = (vector == 10)
  print equal_to_ten
  print(vector[equal_to_ten])
  
  [False  True False False]
  [10]


  #矩阵表示索引
  matrix = numpy.array([
                  [5, 10, 15], 
                  [20, 25, 30],
                  [35, 40, 45]
               ])
  second_column_25 = (matrix[:,1] == 25)
  print second_column_25
  print(matrix[second_column_25, :])
  
  [False  True False]
  [[20 25 30]]
复制代码

对数组进行与运算

#We can also perform comparisons with multiple conditions
  vector = numpy.array([5, 10, 15, 20])
  equal_to_ten_and_five = (vector == 10) & (vector == 5)
  print equal_to_ten_and_five
  
  [False False False False]
  
  
  vector = numpy.array([5, 10, 15, 20])
  equal_to_ten_or_five = (vector == 10) | (vector == 5)
  print equal_to_ten_or_five
  
  [ True  True False False]
复制代码

值类型转换

vector = numpy.array(["1", "2", "3"])
  print vector.dtype
  print vector
  vector = vector.astype(float)
  print vector.dtype
  print vector
  
  |S1
  ['1' '2' '3']
  float64
  [ 1.  2.  3.]
复制代码

聚合求解

vector = numpy.array([5, 10, 15, 20])
  vector.sum()
复制代码

按行维度（axis=1）

matrix = numpy.array([
                 [5, 10, 15], 
                 [20, 25, 30],
                 [35, 40, 45]
              ])
 matrix.sum(axis=1)
 array([ 30,  75, 120])
复制代码

按列求和（axis=0）

matrix = numpy.array([
                  [5, 10, 15], 
                  [20, 25, 30],
                  [35, 40, 45]
               ])
  matrix.sum(axis=0)  
复制代码

矩阵操作np.arange生成0-N的整数

import numpy as np
  a = np.arange(15).reshape(3, 5)
  a

  array([[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14]])
         
  a.ndim
  2
  
  a.dtype.name
  'int32'
  
  a.size
  15
复制代码

矩阵初始化

np.zeros ((3,4)) 
  
  array([[ 0.,  0.,  0.,  0.],
 [ 0.,  0.,  0.,  0.],
 [ 0.,  0.,  0.,  0.]])
 

  np.ones( (2,3,4), dtype=np.int32 )
  
  array([[[1, 1, 1, 1],
  [1, 1, 1, 1],
  [1, 1, 1, 1]],

 [[1, 1, 1, 1],
  [1, 1, 1, 1],
  [1, 1, 1, 1]]])
复制代码

按照间隔生成数据

np.arange( 10, 30, 5 )
  array([10, 15, 20, 25])

  np.arange( 0, 2, 0.3 )
  array([ 0. ,  0.3,  0.6,  0.9,  1.2,  1.5,  1.8])
复制代码

随机生成数据

np.random.random((2,3))
  
  array([[ 0.40130659,  0.45452825,  0.79776512],
 [ 0.63220592,  0.74591134,  0.64130737]])
复制代码

linspace在0到2pi之间取100个数

from numpy import pi
  np.linspace( 0, 2*pi, 100 )

  array([ 0.    ,  0.06346652,  0.12693304,  0.19039955,  0.25386607,
      0.31733259,  0.38079911,  0.44426563,  0.50773215,  0.57119866,
      0.63466518,  0.6981317 ,  0.76159822,  0.82506474,  0.88853126,
      0.95199777,  1.01546429,  1.07893081,  1.14239733,  1.20586385,
      1.26933037,  1.33279688,  1.3962634 ,  1.45972992,  1.52319644,
      1.58666296,  1.65012947,  1.71359599,  1.77706251,  1.84052903,
      1.90399555,  1.96746207,  2.03092858,  2.0943951 ,  2.15786162,
      2.22132814,  2.28479466,  2.34826118,  2.41172769,  2.47519421,
      2.53866073,  2.60212725,  2.66559377,  2.72906028,  2.7925268 ,
      2.85599332,  2.91945984,  2.98292636,  3.04639288,  3.10985939,
      3.17332591,  3.23679243,  3.30025895,  3.36372547,  3.42719199,
      3.4906585 ,  3.55412502,  3.61759154,  3.68105806,  3.74452458,
      3.8079911 ,  3.87145761,  3.93492413,  3.99839065,  4.06185717,
      4.12532369,  4.1887902 ,  4.25225672,  4.31572324,  4.37918976,
      4.44265628,  4.5061228 ,  4.56958931,  4.63305583,  4.69652235,
      4.75998887,  4.82345539,  4.88692191,  4.95038842,  5.01385494,
      5.07732146,  5.14078798,  5.2042545 ,  5.26772102,  5.33118753,
      5.39465405,  5.45812057,  5.52158709,  5.58505361,  5.64852012,
      5.71198664,  5.77545316,  5.83891968,  5.9023862 ,  5.96585272,
      6.02931923,  6.09278575,  6.15625227,  6.21971879,  6.28318531])
复制代码

矩阵基本操作

#the product operator * operates elementwise in NumPy arrays
  a = np.array( [20,30,40,50] )
  b = np.arange( 4 )
  print (a)
  print (b)
  #b
  c = a-b
  print (c)
  b**2
  print (b**2)
  print (a<35)
  
  [20 30 40 50]
  [0 1 2 3]
  [20 29 38 47]
  [ True  True False False]
复制代码

矩阵相乘

#The matrix product can be performed using the dot function or method
  A = np.array([[1,1],
                 [0,1]] )
  B = np.array([[2,0],
                 [3,4]])
  print (A)
  print (B)
  print (A*B)
  
  print (A.dot(B))
  print (np.dot(A, B) )
  
  [[1 1]
   [0 1]]
   
  [[2 0]
   [3 4]]
   
  [[2 0]
   [0 4]]
   
  [[5 4]
   [3 4]]
   
  [[5 4]
   [3 4]]
复制代码

矩阵操作floor向下取整

import numpy as np
  B = np.arange(3)
  print (B)
  #print np.exp(B)
  print (np.sqrt(B))
  
  [0 1 2]
  [0.         1.         1.41421356]
  
  #Return the floor of the input
  a = np.floor(10*np.random.random((3,4)))
  #print a
  
  #Return the floor of the input
  a = np.floor(10*np.random.random((3,4)))
  print (a)
  
  print(a.reshape(2,-1))
  
  [[0. 4. 2. 2.]
   [8. 1. 5. 7.]
   [0. 9. 7. 4.]]
   
  [[0. 4. 2. 2. 8. 1.]
   [5. 7. 0. 9. 7. 4.]]
复制代码

hstack矩阵拼接

a = np.floor(10*np.random.random((2,2)))
  b = np.floor(10*np.random.random((2,2)))
  print a
  print '---'
  print b
  print '---'
  print np.hstack((a,b))
  
  [[ 5.  6.]
   [ 1.  5.]]
  ---
  [[ 8.  6.]
   [ 9.  0.]]
  ---
  [[ 5.  6.  8.  6.]
   [ 1.  5.  9.  0.]]

  a = np.floor(10*np.random.random((2,2)))
  b = np.floor(10*np.random.random((2,2)))
  print (a)
  print ('---')
  print (b)
  print ('---')
  #print np.hstack((a,b))
  np.vstack((a,b))
  
  [[7. 7.]
   [2. 6.]]
  ---
  [[0. 6.]
   [0. 3.]]
  ---
 array([[1., 0.],
 [3., 6.],
 [4., 2.],
 [8., 7.]])

  a = np.floor(10*np.random.random((2,12)))
  print (a)
  print (np.hsplit(a,3))
  
  [[6. 5. 2. 4. 2. 4. 9. 4. 4. 6. 8. 9.]
   [8. 4. 0. 2. 6. 5. 2. 5. 0. 4. 1. 6.]]
  [array([[6., 5., 2., 4.],
         [8., 4., 0., 2.]]), array([[2., 4., 9., 4.],
         [6., 5., 2., 5.]]), array([[4., 6., 8., 9.],
         [0., 4., 1., 6.]])]
复制代码

任意选择切分位置

print ( np.hsplit(a,(3,4)))   # Split a after the third and the fourth column
  
  [[2. 8. 4.    7.    6. 6. 5. 8. 8. 3. 0. 1.]
   [3. 5. 9.    4.    5. 8. 7. 6. 2. 3. 8. 4.]]
  
  [array([[2., 8., 4.],
  [3., 5., 9.]]), array([[7.],
  [4.]]), array([[6., 6., 5., 8., 8., 3., 0., 1.],
  [5., 8., 7., 6., 2., 3., 8., 4.]])]
复制代码

变量赋值
变量视图

copy实现变量之间没有关系

d = a.copy() 
  d is a
  d[0,0] = 9999
  print d 
  print a

  [[9999    1    2    3]
   [1234    5    6    7]
   [   8    9   10   11]]
  [[   0    1    2    3]
   [1234    5    6    7]
   [   8    9   10   11]]
复制代码

寻找列最大值索引

以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持码农网

查看所有标签

猜你喜欢:

本站部分资源来源于网络，本站转载出于传递更多信息之目的，版权归原作者或者来源机构所有，如转载稿涉及版权问题，请联系我们。

码农书籍

Java语言程序设计

（美） Y. Daniel Liang / 李娜 / 机械工业出版社 / 2011-6 / 75.00元

本书是Java语言的经典教材，多年来畅销不衰。本书全面整合了Java 6的特性，采用“基础优先，问题驱动”的教学方式，循序渐进地介绍了程序设计基础、解决问题的方法、面向对象程序设计、图形用户界面设计、异常处理、I/O和递归等内容。此外，本书还全面且深入地覆盖了一些高级主题，包括算法和数据结构、多线程、网络、国际化、高级GUI等内容。本书中文版由《Java语言程序设计：基础篇》和《Java语......一起来看看《Java语言程序设计》这本书的介绍吧!

码农工具

Python基础算法库Numpy及可视化库使用实践-大数据ML样本集案例实战

1 Numpy详细使用

Java语言程序设计

CSS 压缩/解压工具

RGB转16进制工具

图片转BASE64编码