mongodb的聚合操作

mongodb的数据库聚合操作

数据库基础


原始数据

db.mycol.insert([
{
   title: 'MongoDB Overview', 
   description: 'MongoDB is no sql database',
   by_user: 'w3cschool.cc',
   url: 'http://www.w3cschool.cc',
   tags: ['mongodb', 'database', 'NoSQL'],
   likes: 100
},
{
   title: 'NoSQL Overview', 
   description: 'No sql database is very fast',
   by_user: 'w3cschool.cc',
   url: 'http://www.w3cschool.cc',
   tags: ['mongodb', 'database', 'NoSQL'],
   likes: 10
},
{
   title: 'Neo4j Overview', 
   description: 'Neo4j is no sql database',
   by_user: 'Neo4j',
   url: 'http://www.neo4j.com',
   tags: ['neo4j', 'database', 'NoSQL'],
   likes: 750
}])

把by_user字段的数据分到一个组,统计每个组的记录个数

> db.mycol.aggregate(
[
  {
    $group: {
      _id: "$by_user",
      num_tutorial: {
        $sum: 1
      }
    }
  }
]

求出每个作者的好评数

> db.mycol.aggregate([{$group:{_id :'$by_user', count:{$sum:'$likes'}}}])

获取表中所有好评数

> db.mycol.aggregate([{$group:{_id :null, count:{$sum:'$likes'}}}])
{ "_id" : null, "count" : 860 }

求出所有记录的平均好评数

> db.mycol.aggregate([{$group:{_id :null, avg:{$avg:'$likes'}}}])
{ "_id" : null, "avg" : 286.6666666666667 }

求出每个作者的平均好评

> db.mycol.aggregate([{$group:{_id:'$by_user', avg_likes:{$avg:'$likes'}}}])
{ "_id" : "Neo4j", "avg_likes" : 750 }
{ "_id" : "w3cschool.cc", "avg_likes" : 55 }

找出最差评分的作者

> db.mycol.aggregate({$group:{_id:null, min_like:{$min:'$likes'}}})
{ "_id" : null, "min_like" : 10 }

分组后把每组的url添加到数组中

db.mycol.aggregate([{$group : {_id : "$by_user", url : {$push: "$url"}}}])

$project

修改输入文档的结构。可以用来重命名、增加或删除字段(域),也可以用于创建计算结果以及嵌套文档。

只显示相关字段 相当于投影

> db.books.aggregate({$project:{ title:1,author:1,_id:0}})
{ "title" : "abc123", "author" : { "last" : "zzz", "first" : "aaa" } }

选择字典中的键 控制显影

> db.books.aggregate({$project:{ title:1,_id:0,'author.last':1}})
{ "title" : "abc123", "author" : { "last" : "zzz" } }

字符串可以部分截取

> db.books.aggregate({$project:{_id:0,title:1,authorfisrtName:'$author.first',isbn:{$substr:['$isbn',1,3]}}})
{ "title" : "abc123", "isbn" : "001", "authorfisrtName" : "aaa" }

$match

原始数据

db.articles.insert([
{ "_id" : 1, "author" : "dave", "score" : 80, "views" : 100 },
{ "_id" : 2, "author" : "dave", "score" : 85, "views" : 521 },
{ "_id" : 3, "author" : "ahn", "score" : 60, "views" : 1000 },
{ "_id" : 4, "author" : "li", "score" : 55, "views" : 5000 },
{ "_id" : 5, "author" : "annT", "score" : 60, "views" : 50 },
{ "_id" : 6, "author" : "li", "score" : 94, "views" : 999 },
{ "_id" : 7, "author" : "ty", "score" : 95, "views" : 1000 }
])

找出名字是dave的记录

> db.articles.aggregate([{$match:{'author':'dave'}}])
{ "_id" : 1, "author" : "dave", "score" : 80, "views" : 100 }
{ "_id" : 2, "author" : "dave", "score" : 85, "views" : 521 }

找出 70-90的数据或者1000 的数据 再统计个数

db.articles.aggregate(
[
  {
    $match: {
      $or: [
        {
          score: {
            $gt: 70,
            $lt: 90
          }
        },
        {
          views: {
            $gte: 1000
          }
        }
      ]
    }
  },
  {
    $group: {
      _id: null,
      count: {
        $sum: 1
      }
    }
  }
]
 )

group

原始数据

db.sales.aggregate(
   [
      {
        $group : {
           _id : { month: { $month: "$date" }, day: { $dayOfMonth: "$date" }, year: { $year: "$date" } },
           averageQuantity: { $avg: "$quantity" },
           count: { $sum: 1 }
        }
      }
   ]
)

按照年月日分组 统计每个分组的数 平均数量

db.sales.aggregate(
   [
      {
        $group : {
           _id : { month: { $month: "$date" }, day: { $dayOfMonth: "$date" }, year: { $year: "$date" } },
           averageQuantity: { $avg: "$quantity" },
           count: { $sum: 1 }
        }
      }
   ]
)

如果是_id = null 全部数据都会到组中

db.sales.aggregate(
   [
      {
        $group : {
           _id : null,
           totalPrice: { $sum: { $multiply: [ "$price", "$quantity" ] } },
           averageQuantity: { $avg: "$quantity" },
           count: { $sum: 1 }
        }
      }
   ]
)

相当于找出item去重

db.sales.aggregate( [ { $group : { _id : "$item" } } ] )

分类作者 找出书籍

db.books.aggregate(
   [
     { $group : { _id : "$author", books: { $push: "$title" } } }
   ]
)

unwind

拆分数组

db.inventory.insert(
{ "_id" : 1, "item" : "ABC1", sizes: [ "S", "M", "L"] }
)
db.inventory.aggregate( [
   { $unwind: { path: "$sizes", preserveNullAndEmptyArrays: true } }
] )
{ "_id" : 1, "item" : "ABC", "sizes" : "S" }
{ "_id" : 1, "item" : "ABC", "sizes" : "M" }
{ "_id" : 1, "item" : "ABC", "sizes" : "L" }
{ "_id" : 2, "item" : "EFG" }
{ "_id" : 3, "item" : "IJK", "sizes" : "M" }
{ "_id" : 4, "item" : "LMN" }
{ "_id" : 5, "item" : "XYZ", "sizes" : null }

includeArrayIndex: "arrayIndex"可以为每个数组中的数据添加 在数组中的下标

preserveNullAndEmptyArrays: true 有[],null,没有字典的都会显示 一般时候[]和没有的不显示

lookup表链接

{
   $lookup:
     {
       from: <collection to join>,
       localField: <field from the input documents>,
       foreignField: <field from the documents of the "from" collection>,
       as: <output array field>
     }
}

from链接那个表,local表中那个字段,foreign参考表字段。 as 放到一个数组中

db.orders.insert([
{ "_id" : 1, "item" : "abc", "price" : 12, "quantity" : 2 },
{ "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1 },
{ "_id" : 3  }
])
db.inventory.insert([
{ "_id" : 1, "sku" : "abc", description: "product 1", "instock" : 120 },
{ "_id" : 2, "sku" : "def", description: "product 2", "instock" : 80 },
{ "_id" : 3, "sku" : "ijk", description: "product 3", "instock" : 60 },
{ "_id" : 4, "sku" : "jkl", description: "product 4", "instock" : 70 },
{ "_id" : 5, "sku": null, description: "Incomplete" },
{ "_id" : 6 }
])
db.orders.aggregate([
    {
      $lookup:
        {
          from: "inventory",
          localField: "item",
          foreignField: "sku",
          as: "inventory_docs"
        }
   }
]).pretty()

结果 是匹配到的放到数组中

{
  "_id" : 1,
   "item" : "abc",
  "price" : 12,
  "quantity" : 2,
  "inventory_docs" : [
    { "_id" : 1, "sku" : "abc", description: "product 1", "instock" : 120 }
  ]
}
{
  "_id" : 2,
  "item" : "jkl",
  "price" : 20,
  "quantity" : 1,
  "inventory_docs" : [
    { "_id" : 4, "sku" : "jkl", "description" : "product 4", "instock" : 70 }
  ]
}
{
  "_id" : 3,
  "inventory_docs" : [
    { "_id" : 5, "sku" : null, "description" : "Incomplete" },
    { "_id" : 6 }
  ]
}

mongodump–数据库的出和导入

导出一个数据库 就是备份

>mongodump -h dbhost -d dbname -o dbdirectory

导出一个集合

python@ubuntu:/home/mongodump$ sudo mongodump -h 192.168.17.129:27017 -d example -c bat_books  -o /home/mongodump/

恢复数据库

>mongorestore -h dbhost -d dbname --dir dbdirectory

导入一个集合

python@ubuntu:/home/mongodump$ mongorestore -h 192.168.17.129:27017 -d example_restore -c books_restore  --dir /home/mongodump/example/bat_books.bson
本文总阅读量