MongoDBのGridFS

GridFSはMongoDBにファイルを格納できる。
使い方次第で色々使えそう。
1つのfileコレクションにメタデータを格納し、複数のchunkコレクション(chunkのサイズはデフォルト256KB)にデータを分割して保存するみたい。

コレクションの構造

file
{
  "_id" : <unspecified>,                  // unique ID for this file
  "length" : data_number,                 // size of the file in bytes
  "chunkSize" : data_number,              // size of each of the chunks.  Default is 256k
  "uploadDate" : data_date,               // date when object first stored
  "md5" : data_string                     // result of running the "filemd5" command on this file's chunks
}
chunk
{
  "_id" : <unspecified>,         // object id of the chunk in the _chunks collection
  "files_id" : <unspecified>,    // _id of the corresponding files collection entry
  "n" : chunk_number,            // chunks are numbered in order, starting with 0
  "data" : data_binary,          // the chunk's payload as a BSON binary type
}

mongofilesコマンド

put
root@johan:~# /usr/local/mongodb/bin/mongofiles -h localhost:27017 -d gridfs put httpd-2.2.22.tar.gz 
connected to: localhost:27017
added file: { _id: ObjectId('4f6207ae8c80c3d8f3a1469e'), filename: "httpd-2.2.22.tar.gz", chunkSize: 262144, uploadDate: new Date(1331824558983), md5: "d77fa5af23df96a8af68ea8114fa6ce1", length: 7200529 }
done!
root@johan:~#
list
root@johan:~# /usr/local/mongodb/bin/mongofiles -h localhost:27017 -d gridfs list
connected to: localhost:27017
httpd-2.2.22.tar.gz	7200529
root@johan:~# 
get
root@johan:/tmp# /usr/local/mongodb/bin/mongofiles -h localhost:27017 -d gridfs get httpd-2.2.22.tar.gz 
connected to: localhost:27017
done write to: httpd-2.2.22.tar.gz
root@johan:/tmp#
search
root@johan:~# /usr/local/mongodb/bin/mongofiles -h localhost:27017 -d gridfs search httpd-2.2.22
connected to: localhost:27017
httpd-2.2.22.tar.gz	7200529
root@johan:~#
delete
root@johan:~# /usr/local/mongodb/bin/mongofiles -h localhost:27017 -d gridfs delete httpd-2.2.22.tar.gz 
connected to: localhost:27017
done!
root@johan:~# /usr/local/mongodb/bin/mongofiles -h localhost:27017 -d gridfs list
connected to: localhost:27017
root@johan:~#
mongoシェル

ファイルをput

root@johan:~# /usr/local/mongodb/bin/mongofiles -h localhost:27017 -d gridfs put httpd-2.2.22.tar.gz 
connected to: localhost:27017
added file: { _id: ObjectId('4f62099cb942b700c6b709ff'), filename: "httpd-2.2.22.tar.gz", chunkSize: 262144, uploadDate: new Date(1331825052497), md5: "d77fa5af23df96a8af68ea8114fa6ce1", length: 7200529 }
done!
root@johan:~# /usr/local/mongodb/bin/mongofiles -h localhost:27017 -d gridfs put httpd-2.4.1.tar.gz 
connected to: localhost:27017
added file: { _id: ObjectId('4f6209a51f1c7d1a54c2b431'), filename: "httpd-2.4.1.tar.gz", chunkSize: 262144, uploadDate: new Date(1331825061450), md5: "4366afbea8149ca125af01fd59a2f8a2", length: 5562940 }
done!
root@johan:~# /usr/local/mongodb/bin/mongofiles -h localhost:27017 -d gridfs list
connected to: localhost:27017
httpd-2.2.22.tar.gz	7200529
httpd-2.4.1.tar.gz	5562940
root@johan:~#

シェルから見る

root@johan:~# /usr/local/mongodb/bin/mongo
MongoDB shell version: 2.0.2
connecting to: test
> use gridfs
switched to db gridfs
> show collections
fs.chunks
fs.files
system.indexes
> db.fs.files.find()
{ "_id" : ObjectId("4f62099cb942b700c6b709ff"), "filename" : "httpd-2.2.22.tar.gz", "chunkSize" : 262144, "uploadDate" : ISODate("2012-03-15T15:24:12.497Z"), "md5" : "d77fa5af23df96a8af68ea8114fa6ce1", "length" : 7200529 }
{ "_id" : ObjectId("4f6209a51f1c7d1a54c2b431"), "filename" : "httpd-2.4.1.tar.gz", "chunkSize" : 262144, "uploadDate" : ISODate("2012-03-15T15:24:21.450Z"), "md5" : "4366afbea8149ca125af01fd59a2f8a2", "length" : 5562940 }
> exit
bye
root@johan:~#

Pythonから使う

pymongoがインストールされていることが前提。
インストールされていない場合はpipやeasy_installでインストールする。

root@johan:~# python
Python 2.7.2+ (default, Oct  4 2011, 20:03:08) 
[GCC 4.6.1] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import pymongo
>>> from gridfs import GridFS
>>> conn = pymongo.Connection('localhost', 27017)
>>> db = conn.gridfs
>>> fs = GridFS(db)
>>> fs.list()
[u'httpd-2.2.22.tar.gz', u'httpd-2.4.1.tar.gz']
>>> fp = open('/root/apache-jmeter-2.6.tgz', 'r')
>>> fid = fs.put(fp)
>>> fid
ObjectId('4f6210ae7fedcb1e3e000000')
>>> fs.list()
[None, u'httpd-2.2.22.tar.gz', u'httpd-2.4.1.tar.gz']
>>> fs.exists(fid)
True
>>> gf = fs.get(fid)
>>> gf
<gridfs.grid_file.GridOut object at 0x9acae2c>
>>> fs.delete(fid)
>>> fs.list()
[u'httpd-2.2.22.tar.gz', u'httpd-2.4.1.tar.gz']
>>> quit()
root@johan:~#