MongoDB aggregate framework very slow when have group
I am trying to make an aggregate query with "group"
to get the total of results.
The total of "requested_items"
(my results) are +- 1.900.000.
If I execute with "group", the query is very slow (+- 300sec).
If I execute without "group", the query is very fast (+- 1sec).
What am I doing wrong?
The sample code is below.
SLOW QUERY
db.minute.aggregate([
{ $match: {
$and: [
{ "status": "Homologado" },
{ "requested_items.status": /aceito/i },
]
} },
{ $sort: {'_id': 1}},
{ $unwind: "$requested_items" },
{ $unwind: "$requested_items.winner" },
{ $match: {
$and: [
{ "status": "Homologado" },
{ "requested_items.status": /aceito/i },
]
} },
{ $project: {
"_id": 1
} },
{ $group: {
"_id" : null,
"total" : {$sum: 1},
} },
], {allowDiskUse: true});
FAST QUERY
db.minute.aggregate([
{ $match: {
$and: [
{ "status": "Homologado" },
{ "requested_items.status": /aceito/i },
]
} },
{ $sort: {'_id': 1}},
{ $unwind: "$requested_items" },
{ $unwind: "$requested_items.winner" },
{ $match: {
$and: [
{ "status": "Homologado" },
{ "requested_items.status": /aceito/i },
]
} },
{ $project: {
"_id": 1
} },
], {allowDiskUse: true});
DB STRUCTURE
{
"_id" : "12345678ABCD",
"field_1" : [
{
"a" : null,
"b" : "ABC"
},
{
"code" : null,
"b" : "ABCD"
}
],
"status" : "Homologado",
"initial_date" : ISODate("2016-05-24T11:31:00.000Z"),
"field_2" : [
{
"a" : "ABC",
"b" : "ABCDE"
},
{
"a" : "ABCF",
"b" : "ABCDEF"
}
],
"field_3" : "Lorem ipsum dolor sit amet...",
"field_4" : [
{
"date" : ISODate("2016-05-24T13:54:48.000Z"),
"a" : "Text",
"b" : "More text..."
}
],
"field_4" : 12312321,
"field_5" : ISODate("2016-05-24T13:55:00.000Z"),
"field_6" : "ABCD",
"requested_items" : [
{
"status" : " Aceito e Habilitado",
"field_a" : "Text...",
"winner" : [
{
"a" : "23213.213213.23/232-23",
"b" : 130446,
"c" : 543223,
"d" : NumberLong(2),
"e" : "ABC 123 FULANO",
"f" : "text",
"g" : {
"description" : "TEXT TEXT TEXT"
}
},
{
"a" : "23213.213213.23/232-23",
"b" : 130446,
"c" : 543223,
"d" : NumberLong(2),
"e" : "ABC 123 FULANO",
"f" : "text",
"g" : {
"description" : "TEXT TEXT TEXT"
}
}
],
"field_c" : {
"_id" : ObjectId("5744dd3271af88052f0cc343"),
"a" : "TEXT",
"b" : "TEXT"
},
"field_d" : NumberLong(2),
"field_e" : 5223,
"field_f" : "Não",
"field_g" : "-",
"field_h" : {
"field_a1" : [
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
},
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
}
],
"field_a2" : [
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
},
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
}
],
"field_a3" : {},
"field_a4" : [
{
"date" : ISODate("2016-05-24T11:34:32.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:12:54.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:48:21.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:55:38.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:55:47.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T13:01:36.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T13:15:02.000Z"),
"A" : "TEXT",
"B" : "TEXT"
}
]
},
"field_i" : "Não",
"field_j" : 1
},
{
"status" : " Aceito e Habilitado",
"field_a" : "Text...",
"winner" : [
{
"a" : "23213.213213.23/232-23",
"b" : 130446,
"c" : 543223,
"d" : NumberLong(2),
"e" : "ABC 123 FULANO",
"f" : "text",
"g" : {
"description" : "TEXT TEXT TEXT"
}
}
],
"field_c" : {
"_id" : ObjectId("5744dd3271af88052f0cc343"),
"a" : "TEXT",
"b" : "TEXT"
},
"field_d" : NumberLong(2),
"field_e" : 5223,
"field_f" : "Não",
"field_g" : "-",
"field_h" : {
"field_a1" : [
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
},
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
}
],
"field_a2" : [
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
},
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
}
],
"field_a3" : {},
"field_a4" : [
{
"date" : ISODate("2016-05-24T11:34:32.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:12:54.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:48:21.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:55:38.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:55:47.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T13:01:36.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T13:15:02.000Z"),
"A" : "TEXT",
"B" : "TEXT"
}
]
},
"field_i" : "Não",
"field_j" : 2
},
{
"status" : " Aceito e Habilitado",
"field_a" : "Text...",
"winner" : [
{
"a" : "23213.213213.23/232-23",
"b" : 130446,
"c" : 543223,
"d" : NumberLong(2),
"e" : "ABC 123 FULANO",
"f" : "text",
"g" : {
"description" : "TEXT TEXT TEXT"
}
}
],
"field_c" : {
"_id" : ObjectId("5744dd3271af88052f0cc343"),
"a" : "TEXT",
"b" : "TEXT"
},
"field_d" : NumberLong(2),
"field_e" : 5223,
"field_f" : "Não",
"field_g" : "-",
"field_h" : {
"field_a1" : [
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
},
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
}
],
"field_a2" : [
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
},
{
"a" : "23213.213213.23/232-23",
"b" : ISODate("2016-05-23T23:54:21.000Z"),
"c" : 103432446,
"d" : 522343,
"e" : "Sim",
"f" : NumberLong(2),
"g" : "TEXT TEXT TEXT",
"h" : "Sim",
"i" : {
"a" : "TEXT TEXT TEXT"
}
}
],
"field_a3" : {},
"field_a4" : [
{
"date" : ISODate("2016-05-24T11:34:32.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:12:54.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:48:21.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:55:38.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T12:55:47.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T13:01:36.000Z"),
"A" : "TEXT",
"B" : "TEXT"
},
{
"date" : ISODate("2016-05-24T13:15:02.000Z"),
"A" : "TEXT",
"B" : "TEXT"
}
]
},
"field_i" : "Não",
"field_j" : 3
},
],
"field_7" : "TEXT",
"field_8" : {
"a" : "TEXT",
"b" : "TEXT",
"c" : "324234",
"d" : "TEXT TEXT TEXT TEXT"
},
"field_9" : 43234
}
EXPLAIN
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {
"$and" : [
{
"status" : "Homologado"
},
{
"requested_items.status" : /aceito/i
}
]
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "module_database.minute",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"status" : {
"$eq" : "Homologado"
}
},
{
"requested_items.status" : /aceito/i
}
]
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : [
{
"status" : {
"$eq" : "Homologado"
}
},
{
"requested_items.status" : /aceito/i
}
]
},
"direction" : "forward"
},
"rejectedPlans" : []
}
}
},
{
"$unwind" : {
"path" : "$requested_items"
}
},
{
"$unwind" : {
"path" : "$requested_items.winner"
}
},
{
"$match" : {
"$and" : [
{
"status" : "Homologado"
},
{
"requested_items.status" : /aceito/i
}
]
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"numberOfdocs" : {
"$sum" : {
"$const" : 1
}
}
}
}
],
"ok" : 1
}
My server is:
OS: UBUNTU14/64
CPU: 6
RAM: 16 GB
Total Storage: 80 GB
Running just the tests of my question.
Finally solved the problem on my query with group. It was a error of design patterns. Thinking in SQL world, i designed the collections before think in my app. As result, slow queries.
To resolve it was need redesign my collections and put the relevant data in first level of my docs. In my searchs, I found that on Aggragation, index needs to be in first stage of pipeline. If i use a field with index after the stage $unwind, it is not considered.
Besides that, i create a int hash for text fields using the package https://github.com/darkskyapp/string-hash. So, my text fields can be indexed.
So my queries changed 300s for 5s.
it is hard to determine speed as we don't have environment details. What you could try to see how explain is predicting you query by adding:
{
explain:true
}
to your aggregation query db.coll.aggregate([pipeline], {explain:true},{allowDiskUse: true})
. What also need to be considered that $unwind
doubles amount of documents to process.
As you re going to count amount of documents -> it could be faster just take size of an array (after first unwind) and sum it later
db.inventory.aggregate(
[
{
$group: {
_id: null,
numberOfdocs: { $sum:{$size: "$requested_items.winner" }}
}
}
]
)
EDIT
after playing with this query I was able to reduce it execution time circa 45%. The main point is to skip second $match
as this scans full result set , so last $group
contains all data and we can filter out what's needed at the end as this operation is done on a small result set.
db.coll.aggregate([{
$match : {
"status" : "Homologado"
}
}, {
$unwind : "$requested_items"
}, {
$unwind : "$requested_items.winner"
}, {
$project : {
x : "$requested_items.status",
}
}, {
$group : {
_id : "$x",
numberOfdocs : {
$sum : 1
}
}
}, {
$match : {
"_id" : /acesssito/i
}
}
], {
allowDiskUse: true
});
链接地址: http://www.djcxy.com/p/92086.html
下一篇: 有组的时候MongoDB聚合框架很慢