Removing duplicated in mongodb not behaving as expected -


i have table in mongodb have realized has duplicated due required data parsing update based on underlying source data.

due change in source, code having unexpected behavior , inserting many duplicates.

the following query should return single value:

db.opts.find({   $query: {     ticker: "vxx",     date: 20150423,     callput: "p",     strike: 27,     maturity: 20150424   },   $orderby: {     maturity: 1   } }) 

yet due bug in code, have multiple entries observation unfortunately. 1 of observations looks this:

{   "_id": objectid("55396c1c44fea47bde858c78"),   "date": 20150423,   "ticker": "vxx",   "callput": "p",   "last": 6.1,   "vol": 25,   "chg": 0.43,   "maturity": 20150424,   "symbol": "vxx150424p00027000",   "open int": 809,   "strike": 27,   "ask": 6.1,   "bid": 5.85 } 

now, goal remove duplicates share fields.

i tried running following:

db.opts.ensureindex({   date: 1,   ticker: 1,   callput: 1,   maturity: 1,   symbol: 1,   strike: 1 }, {   unique: true,   dropdups: true }) 

yet duplicates did not drop.

i additionally tried:

db.opts.createindex({   date: 1,   ticker: 1,   callput: 1,   maturity: 1,   symbol: 1,   strike: 1 }, {   unique: true,   dropdups: true }) 

i not have indices defined on these fields outside of documented above. collection quite vanilla created , insert data daily. nothing else yet.

the duplicate data looks this:

> db.opts.find({$query:{ticker:"vxx",date:20150423,callput:"p",strike:27}}) { "_id" : objectid("55396c1c44fea47bde858c78"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1c44fea47bde858cd1"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1c44fea47bde858d2a"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1d44fea47bde858d83"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1d44fea47bde858ddc"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1d44fea47bde858e35"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1e44fea47bde858e8e"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1e44fea47bde858ee7"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1e44fea47bde858f40"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1f44fea47bde858f99"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c1f44fea47bde858ff2"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c2044fea47bde85904b"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c2044fea47bde8590a4"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c2044fea47bde8590fd"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c2144fea47bde859156"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c2144fea47bde8591af"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c2244fea47bde859208"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c2244fea47bde859261"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c2244fea47bde8592ba"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } { "_id" : objectid("55396c2344fea47bde859313"), "date" : 20150423, "ticker" : "v xx", "callput" : "p", "last" : 6.1, "vol" : 25, "chg" : 0.43, "maturity" : 20150 424, "symbol" : "vxx150424p00027000", "open int" : 809, "strike" : 27, "ask" : 6 .1, "bid" : 5.85 } type "it" more > 

how remove these duplicates?

the dropdups option no longer available in 3.0, can pretty in shell little script iterates on whole collection , removes docs duplicate ones have same key values:

var keys = {}; db.opts.find().foreach(function(doc) {     // create key concatenates keys must unique.     var key = ''.concat(doc.date, doc.ticker, doc.callput, doc.maturity,                         doc.symbol, doc.strike);     if (keys[key]) {         // doc key has been seen, remove doc.         db.opts.remove({_id: doc._id});     } else {         keys[key] = true;     } }); 

obviously, make backup before doing in case doesn't work you're expecting.


Comments

Popular posts from this blog

asp.net mvc - SSO between MVCForum and Umbraco7 -

Python Tkinter keyboard using bind -

ubuntu - Selenium Node Not Connecting to Hub, Not Opening Port -