11package org .icatproject .lucene ;
22
3- import java .util .Arrays ;
43import java .util .HashMap ;
5- import java .util .HashSet ;
64import java .util .Map ;
75import java .util .Set ;
86
97import org .apache .lucene .analysis .Analyzer ;
8+ import org .apache .lucene .analysis .core .KeywordAnalyzer ;
9+ import org .apache .lucene .analysis .miscellaneous .PerFieldAnalyzerWrapper ;
1010import org .apache .lucene .queryparser .flexible .standard .StandardQueryParser ;
1111import org .apache .lucene .queryparser .flexible .standard .config .StandardQueryConfigHandler ;
1212import org .apache .lucene .queryparser .flexible .standard .config .StandardQueryConfigHandler .ConfigurationKeys ;
13+ import org .icatproject .lucene .analyzers .IcatSeparatorAnalyzer ;
14+ import org .icatproject .lucene .analyzers .IcatSynonymAnalyzer ;
1315
1416public class DocumentMapping {
1517
@@ -26,121 +28,113 @@ public static class ParentRelationship {
2628 * @param parentName Name of the parent entity.
2729 * @param joiningField Field that joins the child to its parent.
2830 * @param cascadeDelete If the child is deleted, whether the parent onto which
29- * it is nested should be deleted wholesale or just have
30- * its fields pruned.
31- * @param fields Fields that should be updated by this relationship where
32- * the field is the same on parent and child.
31+ * it is nested should be deleted wholesale or just have
32+ * its fields pruned.
33+ * @param fieldMapping Fields that should be updated by this relationship. The
34+ * key and value will be the same for most fields, but for
35+ * some they will differ to allow fields to be flattened
36+ * across entities (e.g. dataset.name: name).
3337 */
34- public ParentRelationship (String parentName , String joiningField , boolean cascadeDelete , String ... fields ) {
38+ public ParentRelationship (String parentName , String joiningField , boolean cascadeDelete ,
39+ Map <String , String > fieldMapping ) {
3540 this .parentName = parentName ;
3641 this .joiningField = joiningField ;
3742 this .cascadeDelete = cascadeDelete ;
38- fieldMapping = new HashMap <>();
39- for (String field : fields ) {
40- fieldMapping .put (field , field );
41- }
42- }
43-
44- /**
45- * @param parentField Name on the parent, such as "dataset.name"
46- * @param childField Name on the child, such as "name"
47- */
48- public void mapField (String parentField , String childField ) {
49- fieldMapping .put (parentField , childField );
43+ this .fieldMapping = fieldMapping ;
5044 }
5145 }
5246
53- private static Analyzer analyzer = new IcatSynonymAnalyzer ();;
54-
55- public static final Set <String > doubleFields = new HashSet <>();
56- public static final Set <String > longFields = new HashSet <>();
57- public static final Set <String > sortFields = new HashSet <>();
58- public static final Set <String > textFields = new HashSet <>();
59- public static final Set <String > indexedEntities = new HashSet <>();
60- public static final Map <String , ParentRelationship []> relationships = new HashMap <>();
47+ public static final Set <String > doubleFields = Set .of ("numericValue" , "numericValueSI" , "rangeTop" , "rangeTopSI" ,
48+ "rangeBottom" , "rangeBottomSI" );
49+ public static final Set <String > longFields = Set .of ("date" , "startDate" , "endDate" , "dateTimeValue" ,
50+ "investigation.startDate" , "fileSize" , "fileCount" , "datafile.id" , "datafileFormat.id" , "dataset.id" ,
51+ "facility.id" , "facilityCycle.id" , "investigation.id" , "instrument.id" , "id" , "sample.id" ,
52+ "sample.investigation.id" , "sample.type.id" , "technique.id" , "type.id" , "user.id" );
53+ public static final Set <String > sortFields = Set .of ("datafile.id" , "datafileFormat.id" , "dataset.id" , "facility.id" ,
54+ "facilityCycle.id" , "investigation.id" , "instrument.id" , "id" , "sample.id" , "sample.investigation.id" ,
55+ "technique.id" , "type.id" , "user.id" , "date" , "name" , "stringValue" , "dateTimeValue" , "numericValue" ,
56+ "numericValueSI" , "fileSize" , "fileCount" );
57+ public static final Set <String > textFields = Set .of ("name" , "visitId" , "description" , "dataset.name" ,
58+ "investigation.name" , "instrument.name" , "instrument.fullName" , "datafileFormat.name" , "sample.name" ,
59+ "sample.type.name" , "technique.name" , "technique.description" , "technique.pid" , "title" , "summary" ,
60+ "facility.name" , "user.fullName" , "type.name" , "doi" );
61+ public static final Set <String > pathFields = Set .of ("location" );
62+ public static final Set <String > indexedEntities = Set .of ("Datafile" , "Dataset" , "Investigation" ,
63+ "DatafileParameter" , "DatasetParameter" , "DatasetTechnique" , "InstrumentScientist" ,
64+ "InvestigationFacilityCycle" , "InvestigationInstrument" , "InvestigationParameter" , "InvestigationUser" ,
65+ "Sample" , "SampleParameter" );
66+ public static final Map <String , ParentRelationship []> relationships = Map .ofEntries (
67+ Map .entry ("Instrument" , new ParentRelationship [] {
68+ new ParentRelationship ("InvestigationInstrument" , "instrument.id" , true ,
69+ Map .of ("instrument.name" , "instrument.name" , "instrument.fullName" , "instrument.fullName" )) }),
70+ Map .entry ("User" , new ParentRelationship [] {
71+ new ParentRelationship ("InvestigationUser" , "user.id" , true ,
72+ Map .of ("user.name" , "user.name" , "user.fullName" , "user.fullName" )),
73+ new ParentRelationship ("InstrumentScientist" , "user.id" , true ,
74+ Map .of ("user.name" , "user.name" , "user.fullName" , "user.fullName" )) }),
75+ Map .entry ("Sample" , new ParentRelationship [] {
76+ new ParentRelationship ("Dataset" , "sample.id" , false ,
77+ Map .of ("sample.name" , "sample.name" , "sample.investigation.id" , "sample.investigation.id" )),
78+ new ParentRelationship ("Datafile" , "sample.id" , false ,
79+ Map .of ("sample.name" , "sample.name" , "sample.investigation.id" , "sample.investigation.id" )) }),
80+ Map .entry ("SampleType" , new ParentRelationship [] {
81+ new ParentRelationship ("Sample" , "type.id" , true , Map .of ("type.name" , "type.name" )),
82+ new ParentRelationship ("Dataset" , "sample.type.id" , false ,
83+ Map .of ("sample.type.name" , "sample.type.name" )),
84+ new ParentRelationship ("Datafile" , "sample.type.id" , false ,
85+ Map .of ("sample.type.name" , "sample.type.name" )) }),
86+ Map .entry ("InvestigationType" , new ParentRelationship [] {
87+ new ParentRelationship ("Investigation" , "type.id" , true , Map .of ("type.name" , "type.name" )) }),
88+ Map .entry ("DatasetType" , new ParentRelationship [] {
89+ new ParentRelationship ("Dataset" , "type.id" , true , Map .of ("type.name" , "type.name" )) }),
90+ Map .entry ("DatafileFormat" , new ParentRelationship [] {
91+ new ParentRelationship ("Datafile" , "datafileFormat.id" , false ,
92+ Map .of ("datafileFormat.name" , "datafileFormat.name" )) }),
93+ Map .entry ("Facility" , new ParentRelationship [] {
94+ new ParentRelationship ("Investigation" , "facility.id" , true ,
95+ Map .of ("facility.name" , "facility.name" )) }),
96+ Map .entry ("ParameterType" , new ParentRelationship [] {
97+ new ParentRelationship ("DatafileParameter" , "type.id" , true ,
98+ Map .of ("type.name" , "type.name" , "type.units" , "type.units" )),
99+ new ParentRelationship ("DatasetParameter" , "type.id" , true ,
100+ Map .of ("type.name" , "type.name" , "type.units" , "type.units" )),
101+ new ParentRelationship ("InvestigationParameter" , "type.id" , true ,
102+ Map .of ("type.name" , "type.name" , "type.units" , "type.units" )),
103+ new ParentRelationship ("SampleParameter" , "type.id" , true ,
104+ Map .of ("type.name" , "type.name" , "type.units" , "type.units" )) }),
105+ Map .entry ("Technique" , new ParentRelationship [] {
106+ new ParentRelationship ("DatasetTechnique" , "technique.id" , true ,
107+ Map .of ("technique.name" , "technique.name" , "technique.description" , "technique.description" ,
108+ "technique.pid" , "technique.pid" )) }),
109+ Map .entry ("Investigation" , new ParentRelationship [] {
110+ new ParentRelationship ("Dataset" , "investigation.id" , true ,
111+ Map .of ("visitId" , "visitId" , "investigation.name" , "name" , "investigation.title" , "title" ,
112+ "investigation.startDate" , "startDate" )),
113+ new ParentRelationship ("Datafile" , "investigation.id" , true ,
114+ Map .of ("visitId" , "visitId" , "investigation.name" , "name" )) }),
115+ Map .entry ("Dataset" , new ParentRelationship [] {
116+ new ParentRelationship ("Datafile" , "dataset.id" , true , Map .of ("dataset.name" , "name" )) }));
61117
62118 public static final StandardQueryParser genericParser = buildParser ();
63119 public static final StandardQueryParser datafileParser = buildParser ("name" , "description" , "location" ,
64- "datafileFormat.name" , "visitId" , "sample.name" , "sample.type.name" , "doi" );
120+ "location.fileName" , " datafileFormat.name" , "visitId" , "sample.name" , "sample.type.name" , "doi" );
65121 public static final StandardQueryParser datasetParser = buildParser ("name" , "description" , "sample.name" ,
66122 "sample.type.name" , "type.name" , "visitId" , "doi" );
67123 public static final StandardQueryParser investigationParser = buildParser ("name" , "visitId" , "title" , "summary" ,
68124 "facility.name" , "type.name" , "doi" );
69125 public static final StandardQueryParser sampleParser = buildParser ("sample.name" , "sample.type.name" );
70126
71- static {
72- doubleFields .addAll (Arrays .asList ("numericValue" , "numericValueSI" , "rangeTop" , "rangeTopSI" , "rangeBottom" ,
73- "rangeBottomSI" ));
74- longFields .addAll (
75- Arrays .asList ("date" , "startDate" , "endDate" , "dateTimeValue" , "investigation.startDate" , "fileSize" ,
76- "fileCount" , "datafile.id" , "datafileFormat.id" , "dataset.id" , "facility.id" ,
77- "facilityCycle.id" , "investigation.id" , "instrument.id" , "id" , "sample.id" ,
78- "sample.investigation.id" , "sample.type.id" , "technique.id" , "type.id" , "user.id" ));
79- sortFields .addAll (
80- Arrays .asList ("datafile.id" , "datafileFormat.id" , "dataset.id" , "facility.id" , "facilityCycle.id" ,
81- "investigation.id" , "instrument.id" , "id" , "sample.id" , "sample.investigation.id" ,
82- "technique.id" , "type.id" , "user.id" , "date" , "name" , "stringValue" , "dateTimeValue" ,
83- "numericValue" , "numericValueSI" , "fileSize" , "fileCount" ));
84- textFields .addAll (Arrays .asList ("name" , "visitId" , "description" , "location" , "dataset.name" ,
85- "investigation.name" , "instrument.name" , "instrument.fullName" , "datafileFormat.name" , "sample.name" ,
86- "sample.type.name" , "technique.name" , "technique.description" , "technique.pid" , "title" , "summary" ,
87- "facility.name" , "user.fullName" , "type.name" , "doi" ));
88-
89- indexedEntities .addAll (Arrays .asList ("Datafile" , "Dataset" , "Investigation" , "DatafileParameter" ,
90- "DatasetParameter" , "DatasetTechnique" , "InstrumentScientist" , "InvestigationFacilityCycle" ,
91- "InvestigationInstrument" , "InvestigationParameter" , "InvestigationUser" , "Sample" , "SampleParameter" ));
92-
93- relationships .put ("Instrument" , new ParentRelationship [] {
94- new ParentRelationship ("InvestigationInstrument" , "instrument.id" , true , "instrument.name" ,
95- "instrument.fullName" ) });
96- relationships .put ("User" , new ParentRelationship [] {
97- new ParentRelationship ("InvestigationUser" , "user.id" , true , "user.name" , "user.fullName" ),
98- new ParentRelationship ("InstrumentScientist" , "user.id" , true , "user.name" , "user.fullName" ) });
99- relationships .put ("Sample" , new ParentRelationship [] {
100- new ParentRelationship ("Dataset" , "sample.id" , false , "sample.name" , "sample.investigation.id" ),
101- new ParentRelationship ("Datafile" , "sample.id" , false , "sample.name" , "sample.investigation.id" ) });
102- relationships .put ("SampleType" , new ParentRelationship [] {
103- new ParentRelationship ("Sample" , "type.id" , true , "type.name" ),
104- new ParentRelationship ("Dataset" , "sample.type.id" , false , "sample.type.name" ),
105- new ParentRelationship ("Datafile" , "sample.type.id" , false , "sample.type.name" ) });
106- relationships .put ("InvestigationType" ,
107- new ParentRelationship [] { new ParentRelationship ("Investigation" , "type.id" , true , "type.name" ) });
108- relationships .put ("DatasetType" ,
109- new ParentRelationship [] { new ParentRelationship ("Dataset" , "type.id" , true , "type.name" ) });
110- relationships .put ("DatafileFormat" ,
111- new ParentRelationship [] {
112- new ParentRelationship ("Datafile" , "datafileFormat.id" , false , "datafileFormat.name" ) });
113- relationships .put ("Facility" ,
114- new ParentRelationship [] { new ParentRelationship ("Investigation" , "facility.id" , true , "facility.name" ) });
115- relationships .put ("ParameterType" ,
116- new ParentRelationship [] {
117- new ParentRelationship ("DatafileParameter" , "type.id" , true , "type.name" , "type.units" ),
118- new ParentRelationship ("DatasetParameter" , "type.id" , true ,"type.name" , "type.units" ),
119- new ParentRelationship ("InvestigationParameter" , "type.id" , true , "type.name" , "type.units" ),
120- new ParentRelationship ("SampleParameter" , "type.id" , true , "type.name" , "type.units" ) });
121- relationships .put ("Technique" ,
122- new ParentRelationship [] { new ParentRelationship ("DatasetTechnique" , "technique.id" , true ,"technique.name" ,
123- "technique.description" , "technique.pid" ) });
124-
125- ParentRelationship investigationDatasetRelationship = new ParentRelationship ("Dataset" , "investigation.id" ,
126- true , "visitId" );
127- investigationDatasetRelationship .mapField ("investigation.name" , "name" );
128- investigationDatasetRelationship .mapField ("investigation.title" , "title" );
129- investigationDatasetRelationship .mapField ("investigation.startDate" , "startDate" );
130- ParentRelationship investigationDatafileRelationship = new ParentRelationship ("Datafile" , "investigation.id" ,
131- true ,"visitId" );
132- investigationDatafileRelationship .mapField ("investigation.name" , "name" );
133- relationships .put ("Investigation" , new ParentRelationship [] {investigationDatasetRelationship , investigationDatafileRelationship });
134-
135- ParentRelationship datasetDatafileRelationship = new ParentRelationship ("Datafile" , "dataset.id" , true );
136- datasetDatafileRelationship .mapField ("dataset.name" , "name" );
137- relationships .put ("Dataset" , new ParentRelationship [] { datasetDatafileRelationship });
138- }
139-
140127 private static StandardQueryParser buildParser (String ... defaultFields ) {
141- StandardQueryParser parser = new StandardQueryParser ();
128+ HashMap <String , Analyzer > analyzerMap = new HashMap <>();
129+ for (String pathField : pathFields ) {
130+ analyzerMap .put (pathField , new IcatSeparatorAnalyzer ("/" ));
131+ analyzerMap .put (pathField + ".exact" , new KeywordAnalyzer ());
132+ analyzerMap .put (pathField + ".fileName" , new IcatSeparatorAnalyzer ("." ));
133+ }
134+ PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper (new IcatSynonymAnalyzer (), analyzerMap );
135+ StandardQueryParser parser = new StandardQueryParser (analyzerWrapper );
136+
142137 StandardQueryConfigHandler qpConf = (StandardQueryConfigHandler ) parser .getQueryConfigHandler ();
143- qpConf .set (ConfigurationKeys .ANALYZER , analyzer );
144138 qpConf .set (ConfigurationKeys .ALLOW_LEADING_WILDCARD , true );
145139 if (defaultFields .length > 0 ) {
146140 qpConf .set (ConfigurationKeys .MULTI_FIELDS , defaultFields );
0 commit comments