Package boilerpipe: Specfile
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | Group: Development/Java # BEGIN SourceDeps(oneline): BuildRequires(pre): rpm-macros-java BuildRequires: rpm-build-java # END SourceDeps(oneline) BuildRequires: /proc BuildRequires: jpackage-generic-compat # see https://bugzilla.altlinux.org/show_bug.cgi?id=10382 %define _localstatedir %{_var} Name: boilerpipe Version: 1.2.0 Release: alt1_13jpp8 Summary: Boilerplate Removal and Fulltext Extraction from HTML pages License: ASL 2.0 Url: https://github.com/kohlschutter/boilerpipe Source0: http://boilerpipe.googlecode.com/files/%{name}-%{version}-src.tar.gz Source1: http://boilerpipe.googlecode.com/svn/repo/de/l3s/%{name}/%{name}/%{version}/%{name}-%{version}.pom # use system libraries Patch0: %{name}-1.2.0-libdir-patch # remove embedded nekohtml Patch1: %{name}-1.2.0-nekohtml-patch BuildRequires: ant BuildRequires: java-devel BuildRequires: javapackages-local BuildRequires: nekohtml BuildRequires: xerces-j2 BuildArch: noarch Source44: import.info %description The boilerpipe library provides algorithms to detect and remove the surplus "clutter" (boilerplate, templates) around the main textual content of a web page. The library already provides specific strategies for common tasks (for example: news article extraction) and may also be easily extended for individual problem settings. Extracting content is very fast (milliseconds), just needs the input document (no global or site-level information required) and is usually quite accurate. %package javadoc Group: Development/Java Summary: Javadoc for %{name} BuildArch: noarch %description javadoc This package contains javadoc for %{name}. %prep %setup -q find . -iname '*.jar' -delete find . -iname '*.class' -delete %patch0 -p0 cp %{SOURCE1} pom.xml %patch1 -p1 # fix non ASCII chars for s in src/main/de/l3s/boilerpipe/BoilerpipeInput.java \ src/main/de/l3s/boilerpipe/BoilerpipeInput.java \ src/main/de/l3s/boilerpipe/BoilerpipeFilter.java \ src/main/de/l3s/boilerpipe/BoilerpipeExtractor.java \ src/main/de/l3s/boilerpipe/BoilerpipeProcessingException.java \ src/main/de/l3s/boilerpipe/conditions/TextBlockCondition.java \ src/main/de/l3s/boilerpipe/document/TextBlock.java \ src/main/de/l3s/boilerpipe/document/TextDocumentStatistics.java \ src/main/de/l3s/boilerpipe/document/TextDocument.java \ src/main/de/l3s/boilerpipe/estimators/SimpleEstimator.java \ src/main/de/l3s/boilerpipe/extractors/LargestContentExtractor.java \ src/main/de/l3s/boilerpipe/extractors/DefaultExtractor.java \ src/main/de/l3s/boilerpipe/extractors/NumWordsRulesExtractor.java \ src/main/de/l3s/boilerpipe/extractors/KeepEverythingWithMinKWordsExtractor.java \ src/main/de/l3s/boilerpipe/extractors/ExtractorBase.java \ src/main/de/l3s/boilerpipe/extractors/ArticleSentencesExtractor.java \ src/main/de/l3s/boilerpipe/extractors/CommonExtractors.java \ src/main/de/l3s/boilerpipe/extractors/CanolaExtractor.java \ src/main/de/l3s/boilerpipe/extractors/ArticleExtractor.java \ src/main/de/l3s/boilerpipe/extractors/KeepEverythingExtractor.java \ src/main/de/l3s/boilerpipe/filters/english/HeuristicFilterBase.java \ src/main/de/l3s/boilerpipe/filters/english/KeepLargestFulltextBlockFilter.java \ src/main/de/l3s/boilerpipe/filters/english/TerminatingBlocksFinder.java \ src/main/de/l3s/boilerpipe/filters/english/IgnoreBlocksAfterContentFilter.java \ src/main/de/l3s/boilerpipe/filters/english/IgnoreBlocksAfterContentFromEndFilter.java \ src/main/de/l3s/boilerpipe/filters/english/DensityRulesClassifier.java \ src/main/de/l3s/boilerpipe/filters/english/MinFulltextWordsFilter.java \ src/main/de/l3s/boilerpipe/filters/english/NumWordsRulesClassifier.java \ src/main/de/l3s/boilerpipe/filters/heuristics/SimpleBlockFusionProcessor.java \ src/main/de/l3s/boilerpipe/filters/heuristics/BlockProximityFusion.java \ src/main/de/l3s/boilerpipe/filters/heuristics/KeepLargestBlockFilter.java \ src/main/de/l3s/boilerpipe/filters/heuristics/DocumentTitleMatchClassifier.java \ src/main/de/l3s/boilerpipe/filters/heuristics/LabelFusion.java \ src/main/de/l3s/boilerpipe/filters/heuristics/AddPrecedingLabelsFilter.java \ src/main/de/l3s/boilerpipe/filters/heuristics/ExpandTitleToContentFilter.java \ src/main/de/l3s/boilerpipe/filters/heuristics/ContentFusion.java \ src/main/de/l3s/boilerpipe/filters/simple/MinWordsFilter.java \ src/main/de/l3s/boilerpipe/filters/simple/LabelToBoilerplateFilter.java \ src/main/de/l3s/boilerpipe/filters/simple/LabelToContentFilter.java \ src/main/de/l3s/boilerpipe/filters/simple/InvertedFilter.java \ src/main/de/l3s/boilerpipe/filters/simple/InvertedFilter.java \ src/main/de/l3s/boilerpipe/filters/simple/MinClauseWordsFilter.java \ src/main/de/l3s/boilerpipe/filters/simple/BoilerplateBlockFilter.java \ src/main/de/l3s/boilerpipe/filters/simple/SplitParagraphBlocksFilter.java \ src/main/de/l3s/boilerpipe/filters/simple/MarkEverythingContentFilter.java \ src/main/de/l3s/boilerpipe/labels/DefaultLabels.java \ src/main/de/l3s/boilerpipe/labels/ConditionalLabelAction.java \ src/main/de/l3s/boilerpipe/labels/LabelAction.java \ src/main/de/l3s/boilerpipe/sax/BoilerpipeSAXInput.java \ src/main/de/l3s/boilerpipe/sax/HTMLHighlighter.java \ src/main/de/l3s/boilerpipe/sax/BoilerpipeHTMLContentHandler.java \ src/main/de/l3s/boilerpipe/sax/BoilerpipeHTMLParser.java \ src/main/de/l3s/boilerpipe/sax/TagActionMap.java \ src/main/de/l3s/boilerpipe/sax/InputSourceable.java \ src/main/de/l3s/boilerpipe/sax/HTMLDocument.java \ src/main/de/l3s/boilerpipe/sax/CommonTagActions.java \ src/main/de/l3s/boilerpipe/sax/DefaultTagActionMap.java \ src/main/de/l3s/boilerpipe/sax/HTMLFetcher.java \ src/main/de/l3s/boilerpipe/sax/TagAction.java \ src/main/de/l3s/boilerpipe/sax/MarkupTagAction.java \ src/main/de/l3s/boilerpipe/util/UnicodeTokenizer.java;do native2ascii -encoding UTF8 ${s} ${s} done %build ant -Dapp.javaversion=1.6 %install %mvn_artifact pom.xml dist/%{name}-%{version}.jar %mvn_file de.l3s.%{name}:%{name} %{name} %mvn_install -J javadoc/1.2 install -pm 644 dist/%{name}-demo-%{version}.jar \ %{buildroot}%{_javadir}/%{name}-demo.jar %files -f .mfiles %{_javadir}/%{name}-demo.jar %doc --no-dereference LICENSE.txt NOTICE.txt %files javadoc -f .mfiles-javadoc %doc --no-dereference LICENSE.txt NOTICE.txt %changelog * Sat May 25 2019 Igor Vlasenko <viy@altlinux.ru> 1.2.0-alt1_13jpp8 - new version * Tue Feb 05 2019 Igor Vlasenko <viy@altlinux.ru> 1.2.0-alt1_12jpp8 - fc29 update * Sun Apr 15 2018 Igor Vlasenko <viy@altlinux.ru> 1.2.0-alt1_11jpp8 - java update * Thu Nov 09 2017 Igor Vlasenko <viy@altlinux.ru> 1.2.0-alt1_10jpp8 - fc27 update * Thu Nov 02 2017 Igor Vlasenko <viy@altlinux.ru> 1.2.0-alt1_9jpp8 - new jpp release * Tue Nov 22 2016 Igor Vlasenko <viy@altlinux.ru> 1.2.0-alt1_7jpp8 - new fc release * Wed Feb 10 2016 Igor Vlasenko <viy@altlinux.ru> 1.2.0-alt1_6jpp8 - new version |