Examples of org.carrot2.core.Document

org.carrot2.core.Document
A document that to be processed by the framework. Each document is a collection of fields carrying different bits of information, e.g. {@link #TITLE} or{@link #CONTENT_URL}.

        .getLexicalData(LanguageCode.MALTESE);


    for (String word : wordsToCheck.split(",")) {
      if (!lexicalData.isCommonWord(new MutableCharArray(word))
          && !lexicalData.isStopLabel(word)) {
        clusters.add(new Cluster(word));
      }
    }
  }

View Full Code Here


  @Test
  public void testSimple() throws Exception {
    //<start id="crt2.simple"/>
    //... setup some documents elsewhere
    final Controller controller =
            ControllerFactory.createSimple();//<co id="crt2.controller.creation"/>
    documents = new ArrayList<Document>();
    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
      documents.add(doc);
    }
    final ProcessingResult result = controller.process(documents,
            "red fox",
            LingoClusteringAlgorithm.class);//<co id="crt2.process"/>
    displayResults(result);//<co id="crt2.print"/>


    /*

View Full Code Here

    //... setup some documents elsewhere
    final Controller controller =
            ControllerFactory.createSimple();//<co id="crt2.controller.creation"/>
    documents = new ArrayList<Document>();
    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
      documents.add(doc);
    }
    final ProcessingResult result = controller.process(documents,
            "red fox",

View Full Code Here

          if (highlt != null && highlt.length == 1) {
            snippet = highlt[0];
          }
        }
      }
      Document carrotDocument = new Document(getValue(sdoc, titleField),
              snippet, (String)sdoc.getFieldValue(urlField));
      carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
      result.add(carrotDocument);
    }


    return result;
  }

View Full Code Here

        {
            final List entries = feed.getEntries();
            for (Iterator it = entries.iterator(); it.hasNext();)
            {
                final SyndEntry entry = (SyndEntry) it.next();
                final Document document = new Document();


                document.setField(Document.TITLE, clean(entry.getTitle()));
                document.setField(Document.SUMMARY, clean(entry.getDescription()
                    .getValue()));
                document.setField(Document.CONTENT_URL, entry.getLink());


                response.results.add(document);
            }
        }

View Full Code Here

        final List<Document> outputDocuments = Lists
            .newArrayListWithCapacity(inputDocuments.size());
        
        for (Document document : inputDocuments)
        {
            final Document clonedDocument = document.clone();
            for (String fieldName : fields)
            {
                highlightQueryTerms(clonedDocument, fieldName, queryPattern);
            }
            outputDocuments.add(clonedDocument);

View Full Code Here


        if (etools.documents != null)
        {
            for (Document etoolsDocument : etools.documents)
            {
                final Document matchingGoogleDocument = googleDocumentsByUrl
                    .get(etoolsDocument.getField(Document.CONTENT_URL));
                if (matchingGoogleDocument != null)
                {
                    final List<String> sources = etoolsDocument
                        .getField(Document.SOURCES);
                    if (!sources.contains("Google"))
                    {
                        sources.add("Google");
                    }
                    matchingGoogleDocument.setField(Document.SOURCES, sources);
                }
                else
                {
                    response.results.add(etoolsDocument);
                }

View Full Code Here

         * document separately.
         */
        final List<Document> documents = Lists.newArrayList();
        for (Document document : SampleDocumentData.DOCUMENTS_DATA_MINING)
        {
            documents.add(new Document(document.getTitle(), document.getSummary(),
                document.getContentUrl(), LanguageCode.ENGLISH));
        }


        final Map<String, Object> attributes = Maps.newHashMap();
        CommonAttributesDescriptor.attributeBuilder(attributes)

View Full Code Here


            /* Prepare Carrot2 documents */
            final ArrayList<Document> documents = new ArrayList<Document>();
            for (String [] row : data)
            {
                documents.add(new Document(row[1], row[2], row[0]));
            }


            /* A controller to manage the processing pipeline. */
            final Controller controller = ControllerFactory.createSimple();

View Full Code Here

        final List<Cluster> clusters = Lists.newArrayList();


        final ContaminationMetric metric = new ContaminationMetric();
        metric.documents = documents;


        final Document d1 = new Document();
        final Document d2 = documentWithPartitions("test");
        documents.add(d1);


        final Cluster c1 = new Cluster("test", d1);
        clusters.add(c1);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.carrot2.core.Document

com.carrotsearch.hppc.IntArrayList

com.carrotsearch.hppc.IntIntOpenHashMap

com.carrotsearch.hppc.IntStack

com.tamingtext.carrot2.Carrot2ExampleTest

org.apache.http.message.BasicNameValuePair

org.apache.lucene.search.IndexSearcher

org.apache.mahout.math.matrix.DoubleMatrix2D

org.apache.velocity.VelocityContext

org.carrot2.cli.batch.BatchApp

org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.