Last year I needed to store strings in a Java Map using a limited memory.
The solution was rather simple: using a Lucene index to store the content on disk. This would provide interesting access performances. Here is the Gist:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.nio.file.FileSystems; | |
import java.nio.file.Path; | |
import java.util.AbstractMap; | |
import java.util.ArrayList; | |
import java.util.Collection; | |
import java.util.HashSet; | |
import java.util.Map; | |
import java.util.Set; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.document.TextField; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.index.Term; | |
import org.apache.lucene.queryparser.classic.ParseException; | |
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.FSDirectory; | |
import org.apache.lucene.store.RAMDirectory; | |
/** Implementation of a map (possibly persistent) using <em>Apache Lucene</em> 5.0. */ | |
public class LuceneMap implements Map<String, String> { | |
private Directory luceneDirectory; | |
private Analyzer analyzer; | |
/** Name of the <em>Lucene</em> field used for storing the map keys */ | |
private String keyField; | |
/** Name of the <em>Lucene</em> field used for storing the map values */ | |
private String valueField; | |
/** | |
* Construction of a map using an index stored in the RAM with the default | |
* setup (the <em>Lucene</em> field names used are {@code key} and | |
* {@code value}). | |
* @throws IOException | |
*/ | |
public LuceneMap() throws IOException { | |
this(null); | |
} | |
/** | |
* Construction of a map using an index with the default | |
* setup (the <em>Lucene</em> field names used are {@code key} and | |
* {@code value}). | |
* @param fPath | |
* Path of the index in the filesystem, null value to use the RAM. | |
* @throws IOException | |
*/ | |
public LuceneMap(String fPath) | |
throws IOException { | |
this(fPath, "key", "value"); | |
} | |
/** | |
* Construction of a map using an index. | |
* @param fPath | |
* Path of the index in the filesystem, null value to use the RAM. | |
* @param kField | |
* Name of the <em>Lucene</em> field used for storing the map keys | |
* @param vField | |
* Name of the <em>Lucene</em> field used for storing the map values | |
* @throws IOException | |
* @throws NullPointerException | |
* If one of the field names is null | |
*/ | |
public LuceneMap(String fPath, String kField, String vField) | |
throws IOException { | |
if(kField == null)throw new NullPointerException(); | |
if(vField == null)throw new NullPointerException(); | |
if(fPath != null) { | |
Path path = FileSystems.getDefault().getPath(fPath); | |
luceneDirectory = FSDirectory.open(path); | |
} | |
else luceneDirectory = new RAMDirectory(); | |
analyzer = new StandardAnalyzer(); | |
keyField = kField; | |
valueField = vField; | |
// Pour initialisation des segments | |
IndexWriterConfig config = new IndexWriterConfig(analyzer); | |
IndexWriter luceneWriter = | |
new IndexWriter(luceneDirectory, config); | |
luceneWriter.commit(); | |
luceneWriter.close(); | |
} | |
@Override | |
public int size() { | |
try { | |
IndexReader luceneReader = | |
DirectoryReader.open(luceneDirectory); | |
luceneReader = DirectoryReader.open(luceneDirectory); | |
int result = luceneReader.numDocs(); | |
luceneReader.close(); | |
return result; | |
} catch (IOException e) { | |
throw new IllegalStateException(e); | |
} | |
} | |
private Document getDocumentByKey(String key) { | |
QueryParser parser = new QueryParser(keyField, analyzer); | |
Query query; | |
try { | |
String str = null; | |
if(key != null) { | |
str = QueryParser.escape(key); | |
} | |
query = parser.parse(keyField + ":" + str); | |
IndexReader luceneReader = | |
DirectoryReader.open(luceneDirectory); | |
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader); | |
ScoreDoc[] hits = | |
luceneSearcher.search(query, null, 1000).scoreDocs; | |
if(hits.length > 0) { | |
Document hitDoc = luceneSearcher.doc(hits[0].doc); | |
// Checking the key | |
String check = hitDoc.get(keyField); | |
if((check == null) || (!check.equals(key))) { | |
hitDoc = null; | |
} | |
luceneReader.close(); | |
return hitDoc; | |
} | |
else luceneReader.close(); | |
} catch (ParseException e) { | |
throw new IllegalArgumentException(e); | |
} catch (IOException e) { | |
throw new IllegalStateException(e); | |
} | |
return null; | |
} | |
private Document getDocumentByValue(String value) { | |
QueryParser parser = new QueryParser(valueField, analyzer); | |
Query query; | |
try { | |
String str = null; | |
if(value != null) { | |
str = QueryParser.escape(value); | |
} | |
query = parser.parse(valueField + ":" + str); | |
IndexReader luceneReader = | |
DirectoryReader.open(luceneDirectory); | |
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader); | |
ScoreDoc[] hits = | |
luceneSearcher.search(query, null, 1000).scoreDocs; | |
if(hits.length > 0) { | |
Document hitDoc = luceneSearcher.doc(hits[0].doc); | |
luceneReader.close(); | |
return hitDoc; | |
} | |
else luceneReader.close(); | |
} catch (ParseException e) { | |
throw new IllegalArgumentException(e); | |
} catch (IOException e) { | |
throw new IllegalStateException(e); | |
} | |
return null; | |
} | |
@Override | |
public String get(Object key) { | |
if(key == null)return null; | |
Document doc = getDocumentByKey(key.toString()); | |
if(doc != null)return doc.get(valueField); | |
else return null; | |
} | |
@Override | |
public String put(String key, String value) { | |
Document previous = getDocumentByKey(key); | |
Document doc = new Document(); | |
doc.add(new Field(keyField, key, TextField.TYPE_STORED)); | |
doc.add(new Field(valueField, value, TextField.TYPE_STORED)); | |
String previousValue = null; | |
try { | |
IndexWriterConfig config = new IndexWriterConfig(analyzer); | |
IndexWriter luceneWriter = | |
new IndexWriter(luceneDirectory, config); | |
// Pour initialisation des segments | |
if(previous != null) { | |
previousValue = previous.get(valueField); | |
luceneWriter.updateDocument(new Term(keyField, key), doc); | |
} | |
else { | |
luceneWriter.addDocument(doc); | |
} | |
luceneWriter.commit(); | |
luceneWriter.close(); | |
} catch (IOException e) { | |
throw new IllegalStateException(e); | |
} | |
return previousValue; | |
} | |
@Override | |
public String remove(Object key) { | |
if(key == null)return null; | |
Document previous = getDocumentByKey(key.toString()); | |
String previousValue = null; | |
try { | |
IndexWriterConfig config = new IndexWriterConfig(analyzer); | |
IndexWriter luceneWriter = | |
new IndexWriter(luceneDirectory, config); | |
if(previous != null) { | |
previousValue = previous.get(valueField); | |
luceneWriter.deleteDocuments( | |
new Term(keyField, key.toString()) | |
); | |
} | |
luceneWriter.commit(); | |
luceneWriter.close(); | |
} catch (IOException e) { | |
throw new IllegalStateException(e); | |
} | |
return previousValue; | |
} | |
@Override | |
public boolean isEmpty() { | |
return size() == 0; | |
} | |
@Override | |
public boolean containsKey(Object key) { | |
return get(key) != null; | |
} | |
@Override | |
public void putAll(Map<? extends String, ? extends String> m) { | |
for(String key : m.keySet()) { | |
String value = m.get(key); | |
put(key, value); | |
} | |
} | |
@Override | |
public void clear() { | |
try { | |
IndexWriterConfig config = new IndexWriterConfig(analyzer); | |
IndexWriter luceneWriter = | |
new IndexWriter(luceneDirectory, config); | |
luceneWriter.deleteAll(); | |
luceneWriter.commit(); | |
luceneWriter.close(); | |
} catch (IOException e) { | |
throw new IllegalStateException(e); | |
} | |
} | |
@Override | |
public Set<String> keySet() { | |
try { | |
IndexReader luceneReader = DirectoryReader.open(luceneDirectory); | |
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader); | |
int totalRecords = luceneReader.numDocs(); | |
int skipRecords = 0; | |
int takeRecords = 1000; | |
Set<String> result = new HashSet<String>(); | |
QueryParser parser = new QueryParser(keyField, analyzer); | |
Query query = parser.parse("*:*"); | |
while (skipRecords < totalRecords) { | |
TopDocs results = luceneSearcher.search(query, null, | |
skipRecords + takeRecords); | |
ScoreDoc[] scoreDocs = results.scoreDocs; | |
for (int i = skipRecords; i < results.totalHits; i++) { | |
if (i > (skipRecords + takeRecords) - 1) { | |
break; | |
} | |
Document doc = luceneSearcher.doc(scoreDocs[i].doc); | |
String key = doc.get(keyField); | |
result.add(key); | |
} | |
skipRecords += takeRecords; | |
} | |
luceneReader.close(); | |
return result; | |
} catch (ParseException e) { | |
throw new IllegalArgumentException(e); | |
} catch (IOException e) { | |
throw new IllegalStateException(e); | |
} | |
} | |
@Override | |
public Collection<String> values() { | |
try { | |
IndexReader luceneReader = DirectoryReader.open(luceneDirectory); | |
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader); | |
int totalRecords = luceneReader.numDocs(); | |
int skipRecords = 0; | |
int takeRecords = 1000; | |
Collection<String> result = new ArrayList<String>(); | |
QueryParser parser = new QueryParser(valueField, analyzer); | |
Query query = parser.parse("*:*"); | |
while (skipRecords < totalRecords) { | |
TopDocs results = luceneSearcher.search(query, null, | |
skipRecords + takeRecords); | |
ScoreDoc[] scoreDocs = results.scoreDocs; | |
for (int i = skipRecords; i < results.totalHits; i++) { | |
if (i > (skipRecords + takeRecords) - 1) { | |
break; | |
} | |
Document doc = luceneSearcher.doc(scoreDocs[i].doc); | |
String key = doc.get(valueField); | |
result.add(key); | |
} | |
skipRecords += takeRecords; | |
} | |
luceneReader.close(); | |
return result; | |
} catch (ParseException e) { | |
throw new IllegalArgumentException(e); | |
} catch (IOException e) { | |
throw new IllegalStateException(e); | |
} | |
} | |
@Override | |
public Set<java.util.Map.Entry<String, String>> entrySet() { | |
try { | |
IndexReader luceneReader = DirectoryReader.open(luceneDirectory); | |
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader); | |
int totalRecords = luceneReader.numDocs(); | |
int skipRecords = 0; | |
int takeRecords = 1000; | |
Set<java.util.Map.Entry<String, String>> result = | |
new HashSet<java.util.Map.Entry<String, String>>(); | |
QueryParser parser = new QueryParser(keyField, analyzer); | |
Query query = parser.parse("*:*"); | |
while (skipRecords < totalRecords) { | |
TopDocs results = luceneSearcher.search(query, null, | |
skipRecords + takeRecords); | |
ScoreDoc[] scoreDocs = results.scoreDocs; | |
for (int i = skipRecords; i < results.totalHits; i++) { | |
if (i > (skipRecords + takeRecords) - 1) { | |
break; | |
} | |
Document doc = luceneSearcher.doc(scoreDocs[i].doc); | |
String key = doc.get(keyField); | |
String value = doc.get(valueField); | |
result.add( | |
new AbstractMap.SimpleEntry<String, String>(key, value) | |
); | |
} | |
skipRecords += takeRecords; | |
} | |
luceneReader.close(); | |
return result; | |
} catch (ParseException e) { | |
throw new IllegalArgumentException(e); | |
} catch (IOException e) { | |
throw new IllegalStateException(e); | |
} | |
} | |
@Override | |
public boolean containsValue(Object value) { | |
if(value != null) { | |
return (getDocumentByValue(value.toString()) != null); | |
} | |
else return (getDocumentByValue(null) != null); | |
} | |
} |